40#include "llvm/IR/IntrinsicsAMDGPU.h"
41#include "llvm/IR/IntrinsicsR600.h"
50#define DEBUG_TYPE "si-lower"
55 "amdgpu-disable-loop-alignment",
56 cl::desc(
"Do not align and prefetch loops"),
60 "amdgpu-use-divergent-register-indexing",
62 cl::desc(
"Use indirect register addressing for divergent indexes"),
76 unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
77 for (
unsigned Reg = 0; Reg < NumSGPRs; ++
Reg) {
79 return AMDGPU::SGPR0 +
Reg;
195 {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
196 MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
197 MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32,
198 MVT::i1, MVT::v32i32},
202 {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
203 MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
204 MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32,
205 MVT::i1, MVT::v32i32},
212 ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FSQRT, ISD::FCBRT,
213 ISD::FSIN, ISD::FCOS, ISD::FPOW, ISD::FPOWI,
214 ISD::FLDEXP, ISD::FFREXP, ISD::FLOG, ISD::FLOG2,
215 ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10,
216 ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FNEARBYINT,
274 {MVT::f32, MVT::i32, MVT::i64, MVT::f64, MVT::i1},
Expand);
281 {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
282 MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
283 MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32},
286 {MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
287 MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v9f32,
288 MVT::v10f32, MVT::v11f32, MVT::v12f32, MVT::v16f32},
292 {MVT::v2i1, MVT::v4i1, MVT::v2i8, MVT::v4i8, MVT::v2i16,
293 MVT::v3i16, MVT::v4i16, MVT::Other},
298 {MVT::i1, MVT::i32, MVT::i64, MVT::f32, MVT::f64},
Expand);
314 {MVT::v8i32, MVT::v8f32, MVT::v9i32, MVT::v9f32, MVT::v10i32,
315 MVT::v10f32, MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32,
316 MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64, MVT::v4i16,
317 MVT::v4f16, MVT::v4bf16, MVT::v3i64, MVT::v3f64, MVT::v6i32,
318 MVT::v6f32, MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64,
319 MVT::v8i16, MVT::v8f16, MVT::v8bf16, MVT::v16i16, MVT::v16f16,
320 MVT::v16bf16, MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32,
321 MVT::v32i16, MVT::v32f16, MVT::v32bf16}) {
353 for (
MVT Vec64 : { MVT::v2i64, MVT::v2f64 }) {
367 for (
MVT Vec64 : { MVT::v3i64, MVT::v3f64 }) {
381 for (
MVT Vec64 : { MVT::v4i64, MVT::v4f64 }) {
395 for (
MVT Vec64 : { MVT::v8i64, MVT::v8f64 }) {
409 for (
MVT Vec64 : { MVT::v16i64, MVT::v16f64 }) {
424 {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32},
433 {MVT::v2i16, MVT::v2f16, MVT::v2bf16, MVT::v2i8, MVT::v4i8,
434 MVT::v8i8, MVT::v4i16, MVT::v4f16, MVT::v4bf16},
439 {MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32},
Custom);
443 {MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
444 MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32,
445 MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32,
446 MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32},
527 {MVT::f32, MVT::f64},
Legal);
601 ISD::FSIN, ISD::FROUND},
620 {MVT::v2i16, MVT::v2f16, MVT::v2bf16, MVT::v4i16, MVT::v4f16,
621 MVT::v4bf16, MVT::v8i16, MVT::v8f16, MVT::v8bf16, MVT::v16i16,
622 MVT::v16f16, MVT::v16bf16, MVT::v32i16, MVT::v32f16}) {
758 {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
762 {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
766 {MVT::v8i16, MVT::v8f16, MVT::v8bf16, MVT::v16i16, MVT::v16f16,
767 MVT::v16bf16, MVT::v32i16, MVT::v32f16, MVT::v32bf16}) {
789 {MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::v8i16,
790 MVT::v16f16, MVT::v16i16, MVT::v32f16, MVT::v32i16},
793 for (
MVT VT : {MVT::v4i16, MVT::v8i16, MVT::v16i16, MVT::v32i16})
801 for (
MVT VT : {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16})
817 {MVT::v4f32, MVT::v8f32, MVT::v16f32, MVT::v32f32},
837 {MVT::v4i16, MVT::v4f16, MVT::v4bf16, MVT::v2i8, MVT::v4i8,
838 MVT::v8i8, MVT::v8i16, MVT::v8f16, MVT::v8bf16,
839 MVT::v16i16, MVT::v16f16, MVT::v16bf16, MVT::v32i16,
840 MVT::v32f16, MVT::v32bf16},
856 {MVT::f16, MVT::f32, MVT::f64, MVT::v2f16},
Legal);
858 {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
863 {MVT::Other, MVT::f32, MVT::v4f32, MVT::i16, MVT::f16,
864 MVT::bf16, MVT::v2i16, MVT::v2f16, MVT::v2bf16, MVT::i128,
869 {MVT::v2f16, MVT::v2i16, MVT::v2bf16, MVT::v3f16,
870 MVT::v3i16, MVT::v4f16, MVT::v4i16, MVT::v4bf16,
871 MVT::v8i16, MVT::v8f16, MVT::v8bf16, MVT::Other, MVT::f16,
872 MVT::i16, MVT::bf16, MVT::i8, MVT::i128},
876 {MVT::Other, MVT::v2i16, MVT::v2f16, MVT::v2bf16,
877 MVT::v3i16, MVT::v3f16, MVT::v4f16, MVT::v4i16,
878 MVT::v4bf16, MVT::v8i16, MVT::v8f16, MVT::v8bf16,
879 MVT::f16, MVT::i16, MVT::bf16, MVT::i8, MVT::i128},
945 ISD::ATOMIC_CMP_SWAP,
946 ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
948 ISD::ATOMIC_LOAD_ADD,
949 ISD::ATOMIC_LOAD_SUB,
950 ISD::ATOMIC_LOAD_AND,
952 ISD::ATOMIC_LOAD_XOR,
953 ISD::ATOMIC_LOAD_NAND,
954 ISD::ATOMIC_LOAD_MIN,
955 ISD::ATOMIC_LOAD_MAX,
956 ISD::ATOMIC_LOAD_UMIN,
957 ISD::ATOMIC_LOAD_UMAX,
958 ISD::ATOMIC_LOAD_FADD,
959 ISD::ATOMIC_LOAD_FMIN,
960 ISD::ATOMIC_LOAD_FMAX,
961 ISD::ATOMIC_LOAD_UINC_WRAP,
962 ISD::ATOMIC_LOAD_UDEC_WRAP,
977 static const MCPhysReg RCRegs[] = {AMDGPU::MODE};
990 EVT DestVT,
EVT SrcVT)
const {
1000 LLT DestTy,
LLT SrcTy)
const {
1001 return ((Opcode == TargetOpcode::G_FMAD && Subtarget->
hasMadMixInsts()) ||
1002 (Opcode == TargetOpcode::G_FMA && Subtarget->
hasFmaMixInsts())) &&
1004 SrcTy.getScalarSizeInBits() == 16 &&
1028 return (ScalarVT == MVT::bf16 ? MVT::i32 : MVT::v2f16);
1030 return VT.
isInteger() ? MVT::i32 : MVT::f32;
1057 return (NumElts + 1) / 2;
1063 return NumElts * ((
Size + 31) / 32);
1072 EVT VT,
EVT &IntermediateVT,
1073 unsigned &NumIntermediates,
MVT &RegisterVT)
const {
1082 if (ScalarVT == MVT::bf16) {
1083 RegisterVT = MVT::i32;
1084 IntermediateVT = MVT::v2bf16;
1086 RegisterVT = VT.
isInteger() ? MVT::v2i16 : MVT::v2f16;
1087 IntermediateVT = RegisterVT;
1089 NumIntermediates = (NumElts + 1) / 2;
1090 return NumIntermediates;
1095 IntermediateVT = RegisterVT;
1096 NumIntermediates = NumElts;
1097 return NumIntermediates;
1102 RegisterVT = MVT::i16;
1103 IntermediateVT = ScalarVT;
1104 NumIntermediates = NumElts;
1105 return NumIntermediates;
1110 RegisterVT = MVT::i32;
1111 IntermediateVT = ScalarVT;
1112 NumIntermediates = NumElts;
1113 return NumIntermediates;
1117 RegisterVT = MVT::i32;
1118 IntermediateVT = RegisterVT;
1119 NumIntermediates = NumElts * ((
Size + 31) / 32);
1120 return NumIntermediates;
1125 Context,
CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
1130 unsigned MaxNumLanes) {
1131 assert(MaxNumLanes != 0);
1135 unsigned NumElts = std::min(MaxNumLanes, VT->getNumElements());
1146 unsigned MaxNumLanes) {
1152 assert(ST->getNumContainedTypes() == 2 &&
1153 ST->getContainedType(1)->isIntegerTy(32));
1168 DL.getPointerSizeInBits(AS) == 192)
1178 DL.getPointerSizeInBits(AS) == 160) ||
1180 DL.getPointerSizeInBits(AS) == 192))
1188 unsigned IntrID)
const {
1190 if (CI.
hasMetadata(LLVMContext::MD_invariant_load))
1205 if (RsrcIntr->IsImage) {
1220 Info.ptrVal = RsrcArg;
1223 bool IsSPrefetch = IntrID == Intrinsic::amdgcn_s_buffer_prefetch_data;
1232 if (RsrcIntr->IsImage) {
1233 unsigned MaxNumLanes = 4;
1248 std::numeric_limits<unsigned>::max());
1258 if (RsrcIntr->IsImage) {
1279 if ((RsrcIntr->IsImage && BaseOpcode->
NoReturn) || IsSPrefetch) {
1281 Info.memVT = MVT::i32;
1288 case Intrinsic::amdgcn_raw_buffer_load_lds:
1289 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
1290 case Intrinsic::amdgcn_struct_buffer_load_lds:
1291 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
1297 case Intrinsic::amdgcn_raw_atomic_buffer_load:
1298 case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
1299 case Intrinsic::amdgcn_struct_atomic_buffer_load:
1300 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
1303 std::numeric_limits<unsigned>::max());
1313 case Intrinsic::amdgcn_ds_ordered_add:
1314 case Intrinsic::amdgcn_ds_ordered_swap: {
1327 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
1328 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn: {
1331 Info.ptrVal =
nullptr;
1336 case Intrinsic::amdgcn_ds_append:
1337 case Intrinsic::amdgcn_ds_consume: {
1350 case Intrinsic::amdgcn_global_atomic_csub: {
1360 case Intrinsic::amdgcn_image_bvh_intersect_ray: {
1370 case Intrinsic::amdgcn_global_atomic_fmin:
1371 case Intrinsic::amdgcn_global_atomic_fmax:
1372 case Intrinsic::amdgcn_global_atomic_fmin_num:
1373 case Intrinsic::amdgcn_global_atomic_fmax_num:
1374 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
1375 case Intrinsic::amdgcn_flat_atomic_fmin:
1376 case Intrinsic::amdgcn_flat_atomic_fmax:
1377 case Intrinsic::amdgcn_flat_atomic_fmin_num:
1378 case Intrinsic::amdgcn_flat_atomic_fmax_num:
1379 case Intrinsic::amdgcn_atomic_cond_sub_u32: {
1390 case Intrinsic::amdgcn_global_load_tr_b64:
1391 case Intrinsic::amdgcn_global_load_tr_b128: {
1399 case Intrinsic::amdgcn_ds_gws_init:
1400 case Intrinsic::amdgcn_ds_gws_barrier:
1401 case Intrinsic::amdgcn_ds_gws_sema_v:
1402 case Intrinsic::amdgcn_ds_gws_sema_br:
1403 case Intrinsic::amdgcn_ds_gws_sema_p:
1404 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
1414 Info.memVT = MVT::i32;
1416 Info.align =
Align(4);
1418 if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
1424 case Intrinsic::amdgcn_global_load_lds: {
1432 case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
1442 Info.memVT = MVT::i32;
1444 Info.align =
Align(4);
1449 case Intrinsic::amdgcn_s_prefetch_data: {
1464 case Intrinsic::amdgcn_addrspacecast_nonnull: {
1467 unsigned SrcAS =
I.getOperand(0)->getType()->getPointerAddressSpace();
1468 unsigned DstAS =
I.getType()->getPointerAddressSpace();
1480 Type *&AccessTy)
const {
1482 switch (
II->getIntrinsicID()) {
1483 case Intrinsic::amdgcn_atomic_cond_sub_u32:
1484 case Intrinsic::amdgcn_ds_append:
1485 case Intrinsic::amdgcn_ds_consume:
1486 case Intrinsic::amdgcn_ds_ordered_add:
1487 case Intrinsic::amdgcn_ds_ordered_swap:
1488 case Intrinsic::amdgcn_flat_atomic_fmax:
1489 case Intrinsic::amdgcn_flat_atomic_fmax_num:
1490 case Intrinsic::amdgcn_flat_atomic_fmin:
1491 case Intrinsic::amdgcn_flat_atomic_fmin_num:
1492 case Intrinsic::amdgcn_global_atomic_csub:
1493 case Intrinsic::amdgcn_global_atomic_fmax:
1494 case Intrinsic::amdgcn_global_atomic_fmax_num:
1495 case Intrinsic::amdgcn_global_atomic_fmin:
1496 case Intrinsic::amdgcn_global_atomic_fmin_num:
1497 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
1498 case Intrinsic::amdgcn_global_load_tr_b64:
1499 case Intrinsic::amdgcn_global_load_tr_b128:
1500 Ptr =
II->getArgOperand(0);
1502 case Intrinsic::amdgcn_global_load_lds:
1503 Ptr =
II->getArgOperand(1);
1508 AccessTy =
II->getType();
1514 unsigned AddrSpace)
const {
1526 return AM.
Scale == 0 &&
1528 AM.
BaseOffs, AddrSpace, FlatVariant));
1548 return isLegalMUBUFAddressingMode(AM);
1551bool SITargetLowering::isLegalMUBUFAddressingMode(
const AddrMode &AM)
const {
1562 if (!
TII->isLegalMUBUFImmOffset(AM.BaseOffs))
1574 if (AM.HasBaseReg) {
1605 return isLegalMUBUFAddressingMode(AM);
1612 if (Ty->isSized() &&
DL.getTypeStoreSize(Ty) < 4)
1662 : isLegalMUBUFAddressingMode(AM);
1709 unsigned Size,
unsigned AddrSpace,
Align Alignment,
1721 Align RequiredAlignment(
1724 Alignment < RequiredAlignment)
1745 RequiredAlignment =
Align(4);
1763 *IsFast = (Alignment >= RequiredAlignment) ? 64
1764 : (Alignment <
Align(4)) ? 32
1786 *IsFast = (Alignment >= RequiredAlignment) ? 96
1787 : (Alignment <
Align(4)) ? 32
1800 RequiredAlignment =
Align(8);
1811 *IsFast = (Alignment >= RequiredAlignment) ? 128
1812 : (Alignment <
Align(4)) ? 32
1829 *IsFast = (Alignment >= RequiredAlignment) ?
Size : 0;
1831 return Alignment >= RequiredAlignment ||
1836 bool AlignedBy4 = Alignment >=
Align(4);
1838 *IsFast = AlignedBy4;
1840 return AlignedBy4 ||
1850 bool AlignedBy4 = Alignment >=
Align(4);
1852 *IsFast = AlignedBy4;
1863 return Alignment >=
Align(4) ||
1877 return Size >= 32 && Alignment >=
Align(4);
1882 unsigned *IsFast)
const {
1884 Alignment, Flags, IsFast);
1894 if (
Op.size() >= 16 &&
1898 if (
Op.size() >= 8 &&
Op.isDstAligned(
Align(4)))
1916 unsigned DestAS)
const {
1924 return TM.isNoopAddrSpaceCast(SrcAS, DestAS);
1942 unsigned Index)
const {
1982 std::tie(InputPtrReg, RC, ArgTy) =
1992 MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT);
1998 const SDLoc &SL)
const {
2005 const SDLoc &SL)
const {
2008 std::optional<uint32_t> KnownSize =
2010 if (KnownSize.has_value())
2037 Val = getFPExtOrFPRound(DAG, Val, SL, VT);
2046SDValue SITargetLowering::lowerKernargMemParameter(
2058 int64_t OffsetDiff =
Offset - AlignDownOffset;
2064 SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, AlignDownOffset);
2073 ArgVal = DAG.
getNode(ISD::BITCAST, SL, MemVT, ArgVal);
2074 ArgVal = convertArgType(DAG, VT, MemVT, SL, ArgVal,
Signed, Arg);
2085 SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load,
Signed, Arg);
2132 ExtType, SL, VA.
getLocVT(), Chain, FIN,
2161 Reg = &WorkGroupIDX;
2162 RC = &AMDGPU::SReg_32RegClass;
2166 Reg = &WorkGroupIDY;
2167 RC = &AMDGPU::SReg_32RegClass;
2171 Reg = &WorkGroupIDZ;
2172 RC = &AMDGPU::SReg_32RegClass;
2199 CallingConv::ID CallConv,
2203 for (
unsigned I = 0, E = Ins.size(), PSInputNum = 0;
I != E; ++
I) {
2207 "vector type argument should have been split");
2212 bool SkipArg = !Arg->
Used && !Info->isPSInputAllocated(PSInputNum);
2221 "unexpected vector split in ps argument type");
2235 Info->markPSInputAllocated(PSInputNum);
2237 Info->markPSInputEnabled(PSInputNum);
2254 if (Info.hasWorkItemIDX()) {
2260 Info.hasWorkItemIDY()) ? 0x3ff : ~0u;
2264 if (Info.hasWorkItemIDY()) {
2265 assert(Info.hasWorkItemIDX());
2270 unsigned Reg = AMDGPU::VGPR1;
2278 if (Info.hasWorkItemIDZ()) {
2279 assert(Info.hasWorkItemIDX() && Info.hasWorkItemIDY());
2284 unsigned Reg = AMDGPU::VGPR2;
2304 if (RegIdx == ArgVGPRs.
size()) {
2311 unsigned Reg = ArgVGPRs[RegIdx];
2313 assert(Reg != AMDGPU::NoRegister);
2323 unsigned NumArgRegs) {
2326 if (RegIdx == ArgSGPRs.
size())
2329 unsigned Reg = ArgSGPRs[RegIdx];
2331 assert(Reg != AMDGPU::NoRegister);
2345 assert(Reg != AMDGPU::NoRegister);
2371 const unsigned Mask = 0x3ff;
2374 if (Info.hasWorkItemIDX()) {
2376 Info.setWorkItemIDX(Arg);
2379 if (Info.hasWorkItemIDY()) {
2381 Info.setWorkItemIDY(Arg);
2384 if (Info.hasWorkItemIDZ())
2396 const unsigned Mask = 0x3ff;
2407 auto &
ArgInfo = Info.getArgInfo();
2421 if (Info.hasImplicitArgPtr())
2429 if (Info.hasWorkGroupIDX())
2432 if (Info.hasWorkGroupIDY())
2435 if (Info.hasWorkGroupIDZ())
2438 if (Info.hasLDSKernelId())
2449 Register ImplicitBufferPtrReg = Info.addImplicitBufferPtr(
TRI);
2450 MF.
addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
2456 Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(
TRI);
2457 MF.
addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
2462 Register DispatchPtrReg = Info.addDispatchPtr(
TRI);
2463 MF.
addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
2471 MF.
addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
2477 Register InputPtrReg = Info.addKernargSegmentPtr(
TRI);
2486 MF.
addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
2491 Register FlatScratchInitReg = Info.addFlatScratchInit(
TRI);
2492 MF.
addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
2497 Register PrivateSegmentSizeReg = Info.addPrivateSegmentSize(
TRI);
2498 MF.
addLiveIn(PrivateSegmentSizeReg, &AMDGPU::SGPR_32RegClass);
2513 unsigned LastExplicitArgOffset =
2516 bool InPreloadSequence =
true;
2518 for (
auto &Arg :
F.args()) {
2519 if (!InPreloadSequence || !Arg.hasInRegAttr())
2522 int ArgIdx = Arg.getArgNo();
2525 if (InIdx < Ins.size() && (!Ins[InIdx].isOrigArg() ||
2526 (
int)Ins[InIdx].getOrigArgIndex() != ArgIdx))
2529 for (; InIdx < Ins.size() && Ins[InIdx].isOrigArg() &&
2530 (
int)Ins[InIdx].getOrigArgIndex() == ArgIdx;
2532 assert(ArgLocs[ArgIdx].isMemLoc());
2533 auto &ArgLoc = ArgLocs[InIdx];
2535 unsigned ArgOffset = ArgLoc.getLocMemOffset();
2537 unsigned NumAllocSGPRs =
2538 alignTo(ArgLoc.getLocVT().getFixedSizeInBits(), 32) / 32;
2541 if (ArgLoc.getLocVT().getStoreSize() < 4 && Alignment < 4) {
2542 Info.getArgInfo().PreloadKernArgs[InIdx].Regs.push_back(
2543 Info.getArgInfo().PreloadKernArgs[InIdx - 1].Regs[0]);
2547 unsigned Padding = ArgOffset - LastExplicitArgOffset;
2548 unsigned PaddingSGPRs =
alignTo(Padding, 4) / 4;
2550 if (PaddingSGPRs + NumAllocSGPRs + 1 >
2552 InPreloadSequence =
false;
2558 TRI.getSGPRClassForBitWidth(NumAllocSGPRs * 32);
2560 Info.addPreloadedKernArg(
TRI, RC, NumAllocSGPRs, InIdx, PaddingSGPRs);
2562 if (PreloadRegs->
size() > 1)
2563 RC = &AMDGPU::SGPR_32RegClass;
2564 for (
auto &Reg : *PreloadRegs) {
2570 LastExplicitArgOffset = NumAllocSGPRs * 4 + ArgOffset;
2579 if (Info.hasLDSKernelId()) {
2580 Register Reg = Info.addLDSKernelId();
2581 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2590 CallingConv::ID CallConv,
2591 bool IsShader)
const {
2599 assert(!HasArchitectedSGPRs &&
"Unhandled feature for the subtarget");
2601 unsigned CurrentUserSGPRs = Info.getNumUserSGPRs();
2605 unsigned NumRequiredSystemSGPRs = Info.hasWorkGroupIDX() +
2606 Info.hasWorkGroupIDY() +
2607 Info.hasWorkGroupIDZ() +
2608 Info.hasWorkGroupInfo();
2609 for (
unsigned i = NumRequiredSystemSGPRs + CurrentUserSGPRs; i < 16; ++i) {
2610 Register Reg = Info.addReservedUserSGPR();
2611 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2616 if (!HasArchitectedSGPRs) {
2617 if (Info.hasWorkGroupIDX()) {
2618 Register Reg = Info.addWorkGroupIDX();
2619 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2623 if (Info.hasWorkGroupIDY()) {
2624 Register Reg = Info.addWorkGroupIDY();
2625 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2629 if (Info.hasWorkGroupIDZ()) {
2630 Register Reg = Info.addWorkGroupIDZ();
2631 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2636 if (Info.hasWorkGroupInfo()) {
2637 Register Reg = Info.addWorkGroupInfo();
2638 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2642 if (Info.hasPrivateSegmentWaveByteOffset()) {
2644 unsigned PrivateSegmentWaveByteOffsetReg;
2647 PrivateSegmentWaveByteOffsetReg =
2648 Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
2652 if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
2654 Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
2657 PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
2659 MF.
addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
2660 CCInfo.
AllocateReg(PrivateSegmentWaveByteOffsetReg);
2664 Info.getNumPreloadedSGPRs() >= 16);
2679 if (HasStackObjects)
2680 Info.setHasNonSpillStackObjects(
true);
2685 HasStackObjects =
true;
2689 bool RequiresStackAccess = HasStackObjects || MFI.
hasCalls();
2691 if (!ST.enableFlatScratch()) {
2692 if (RequiresStackAccess && ST.isAmdHsaOrMesa(MF.
getFunction())) {
2699 Info.setScratchRSrcReg(PrivateSegmentBufferReg);
2701 unsigned ReservedBufferReg =
TRI.reservedPrivateSegmentBufferReg(MF);
2711 Info.setScratchRSrcReg(ReservedBufferReg);
2730 if (!
MRI.isLiveIn(AMDGPU::SGPR32)) {
2731 Info.setStackPtrOffsetReg(AMDGPU::SGPR32);
2738 for (
unsigned Reg : AMDGPU::SGPR_32RegClass) {
2739 if (!
MRI.isLiveIn(Reg)) {
2740 Info.setStackPtrOffsetReg(Reg);
2745 if (Info.getStackPtrOffsetReg() == AMDGPU::SP_REG)
2752 if (ST.getFrameLowering()->hasFP(MF)) {
2753 Info.setFrameOffsetReg(AMDGPU::SGPR33);
2771 const MCPhysReg *IStart =
TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
2780 if (AMDGPU::SReg_64RegClass.
contains(*
I))
2781 RC = &AMDGPU::SGPR_64RegClass;
2782 else if (AMDGPU::SReg_32RegClass.
contains(*
I))
2783 RC = &AMDGPU::SGPR_32RegClass;
2789 Entry->addLiveIn(*
I);
2794 for (
auto *Exit : Exits)
2796 TII->get(TargetOpcode::COPY), *
I)
2802 SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
2814 Fn,
"unsupported non-compute shaders with HSA",
DL.getDebugLoc());
2833 !Info->hasLDSKernelId() && !Info->hasWorkItemIDX() &&
2834 !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ());
2841 assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
2842 !Info->hasWorkGroupIDZ());
2861 if ((Info->getPSInputAddr() & 0x7F) == 0 ||
2862 ((Info->getPSInputAddr() & 0xF) == 0 && Info->isPSInputAllocated(11))) {
2865 Info->markPSInputAllocated(0);
2866 Info->markPSInputEnabled(0);
2877 unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
2878 if ((PsInputBits & 0x7F) == 0 ||
2879 ((PsInputBits & 0xF) == 0 && (PsInputBits >> 11 & 1)))
2882 }
else if (IsKernel) {
2883 assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
2885 Splits.
append(Ins.begin(), Ins.end());
2898 }
else if (!IsGraphics) {
2923 for (
unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
2933 if (IsEntryFunc && VA.
isMemLoc()) {
2956 if (Arg.
isOrigArg() && Info->getArgInfo().PreloadKernArgs.count(i)) {
2960 int64_t OffsetDiff =
Offset - AlignDownOffset;
2967 Info->getArgInfo().PreloadKernArgs.find(i)->getSecond().Regs[0];
2977 ArgVal = DAG.
getNode(ISD::BITCAST,
DL, MemVT, ArgVal);
2978 NewArg = convertArgType(DAG, VT, MemVT,
DL, ArgVal,
2979 Ins[i].Flags.isSExt(), &Ins[i]);
2987 Info->getArgInfo().PreloadKernArgs.find(i)->getSecond().Regs;
2990 if (PreloadRegs.
size() == 1) {
2991 Register VReg =
MRI.getLiveInVirtReg(PreloadRegs[0]);
2996 TRI->getRegSizeInBits(*RC)));
3004 for (
auto Reg : PreloadRegs) {
3011 PreloadRegs.size()),
3028 NewArg = convertArgType(DAG, VT, MemVT,
DL, NewArg,
3029 Ins[i].Flags.isSExt(), &Ins[i]);
3034 lowerKernargMemParameter(DAG, VT, MemVT,
DL, Chain,
Offset,
3035 Alignment, Ins[i].Flags.isSExt(), &Ins[i]);
3054 if (!IsEntryFunc && VA.
isMemLoc()) {
3055 SDValue Val = lowerStackParameter(DAG, VA,
DL, Chain, Arg);
3066 if (AMDGPU::VGPR_32RegClass.
contains(Reg))
3067 RC = &AMDGPU::VGPR_32RegClass;
3068 else if (AMDGPU::SGPR_32RegClass.
contains(Reg))
3069 RC = &AMDGPU::SGPR_32RegClass;
3096 Val = DAG.
getNode(ISD::BITCAST,
DL, ValVT, Val);
3130 Info->setBytesInStackArgArea(StackArgSize);
3132 return Chains.
empty() ? Chain :
3139 CallingConv::ID CallConv,
3150 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
3156 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
3157 for (
unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i)
3158 if (CCInfo.
isAllocated(AMDGPU::VGPR_32RegClass.getRegister(i)))
3180 Info->setIfReturnsVoid(Outs.
empty());
3181 bool IsWaveEnd = Info->returnsVoid() && IsShader;
3199 for (
unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.
size();
I != E;
3200 ++
I, ++RealRVLocIdx) {
3204 SDValue Arg = OutVals[RealRVLocIdx];
3232 if (!Info->isEntryFunction()) {
3238 if (AMDGPU::SReg_64RegClass.
contains(*
I))
3240 else if (AMDGPU::SReg_32RegClass.
contains(*
I))
3256 return DAG.
getNode(Opc,
DL, MVT::Other, RetOps);
3260 SDValue Chain,
SDValue InGlue, CallingConv::ID CallConv,
bool IsVarArg,
3341 auto &ArgUsageInfo =
3343 CalleeArgInfo = &ArgUsageInfo.lookupFuncArgInfo(*CalleeFunc);
3373 std::tie(OutgoingArg, ArgRC, ArgTy) =
3381 std::tie(IncomingArg, IncomingArgRC, Ty) =
3383 assert(IncomingArgRC == ArgRC);
3386 EVT ArgVT =
TRI->getSpillSize(*ArgRC) == 8 ? MVT::i64 : MVT::i32;
3394 InputReg = getImplicitArgPtr(DAG,
DL);
3396 std::optional<uint32_t> Id =
3398 if (Id.has_value()) {
3410 RegsToPass.emplace_back(OutgoingArg->
getRegister(), InputReg);
3414 unsigned SpecialArgOffset =
3428 std::tie(OutgoingArg, ArgRC, Ty) =
3431 std::tie(OutgoingArg, ArgRC, Ty) =
3434 std::tie(OutgoingArg, ArgRC, Ty) =
3449 const bool NeedWorkItemIDX = !CLI.
CB->
hasFnAttr(
"amdgpu-no-workitem-id-x");
3450 const bool NeedWorkItemIDY = !CLI.
CB->
hasFnAttr(
"amdgpu-no-workitem-id-y");
3451 const bool NeedWorkItemIDZ = !CLI.
CB->
hasFnAttr(
"amdgpu-no-workitem-id-z");
3468 InputReg = InputReg.
getNode() ?
3477 InputReg = InputReg.
getNode() ?
3481 if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
3482 if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
3492 IncomingArgX ? *IncomingArgX :
3493 IncomingArgY ? *IncomingArgY :
3494 *IncomingArgZ, ~0u);
3501 RegsToPass.emplace_back(OutgoingArg->
getRegister(), InputReg);
3530 SDValue Callee, CallingConv::ID CalleeCC,
bool IsVarArg,
3542 if (Callee->isDivergent())
3549 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
3553 if (!CallerPreserved)
3556 bool CCMatch = CallerCC == CalleeCC;
3569 if (Arg.hasByValAttr())
3583 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
3584 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3593 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, Ctx);
3621 CallingConv::ID CallConv = CLI.
CallConv;
3627 if (IsChainCallConv) {
3631 RequestedExec = CLI.
Args.back();
3632 assert(RequestedExec.
Node &&
"No node for EXEC");
3637 assert(CLI.
Outs.back().OrigArgIndex == 2 &&
"Unexpected last arg");
3638 CLI.
Outs.pop_back();
3642 assert(CLI.
Outs.back().OrigArgIndex == 2 &&
"Exec wasn't split up");
3643 CLI.
Outs.pop_back();
3648 "Haven't popped all the pieces of the EXEC mask");
3659 bool IsSibCall =
false;
3673 "unsupported call to variadic function ");
3681 "unsupported required tail call to function ");
3686 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3690 "site marked musttail or on llvm.amdgcn.cs.chain");
3697 if (!TailCallOpt && IsTailCall)
3742 if (!IsSibCall || IsChainCallConv) {
3749 RegsToPass.emplace_back(IsChainCallConv
3750 ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
3751 : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
3758 MVT PtrVT = MVT::i32;
3761 for (
unsigned i = 0, e = ArgLocs.
size(); i != e; ++i) {
3789 RegsToPass.push_back(std::pair(VA.
getLocReg(), Arg));
3797 int32_t
Offset = LocMemOffset;
3804 unsigned OpSize = Flags.isByVal() ?
3810 ? Flags.getNonZeroByValAlign()
3837 if (Outs[i].Flags.isByVal()) {
3839 DAG.
getConstant(Outs[i].Flags.getByValSize(),
DL, MVT::i32);
3842 Outs[i].Flags.getNonZeroByValAlign(),
3844 nullptr, std::nullopt, DstInfo,
3850 DAG.
getStore(Chain,
DL, Arg, DstAddr, DstInfo, Alignment);
3856 if (!MemOpChains.
empty())
3862 for (
auto &RegToPass : RegsToPass) {
3864 RegToPass.second, InGlue);
3873 if (IsTailCall && !IsSibCall) {
3878 std::vector<SDValue> Ops;
3879 Ops.push_back(Chain);
3880 Ops.push_back(Callee);
3897 if (IsChainCallConv)
3898 Ops.push_back(RequestedExec.
Node);
3902 for (
auto &RegToPass : RegsToPass) {
3904 RegToPass.second.getValueType()));
3909 const uint32_t *Mask =
TRI->getCallPreservedMask(MF, CallConv);
3910 assert(Mask &&
"Missing call preserved mask for calling convention");
3920 MVT::Glue, GlueOps),
3925 Ops.push_back(InGlue);
3944 return DAG.
getNode(OPC,
DL, NodeTys, Ops);
3949 Chain =
Call.getValue(0);
3950 InGlue =
Call.getValue(1);
3952 uint64_t CalleePopBytes = NumBytes;
3971 EVT VT =
Op.getValueType();
3977 Register SPReg = Info->getStackPtrOffsetReg();
3997 Tmp1 = DAG.
getNode(Opc, dl, VT, SP, ScaledSize);
3998 if (Alignment && *Alignment > StackAlign) {
4026 if (
Op.getValueType() != MVT::i32)
4045 assert(
Op.getValueType() == MVT::i32);
4054 Op.getOperand(0), IntrinID, GetRoundBothImm);
4088 SDValue RoundModeTimesNumBits =
4108 TableEntry, EnumOffset);
4124 static_cast<uint32_t>(ConstMode->getZExtValue()),
4136 if (UseReducedTable) {
4142 SDValue RoundModeTimesNumBits =
4162 SDValue RoundModeTimesNumBits =
4171 NewMode = TruncTable;
4180 ReadFirstLaneID, NewMode);
4193 IntrinID, RoundBothImm, NewMode);
4199 if (
Op->isDivergent())
4218 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
4219 EVT SrcVT = Src.getValueType();
4228 EVT DstVT =
Op.getValueType();
4232 return DAG.
getNode(ISD::BF16_TO_FP, SL, DstVT, BitCast);
4237 if (
Op.getValueType() != MVT::i64)
4251 Op.getOperand(0), IntrinID, ModeHwRegImm);
4253 Op.getOperand(0), IntrinID, TrapHwRegImm);
4260 SDValue Result = DAG.
getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr);
4267 if (
Op.getOperand(1).getValueType() != MVT::i64)
4270 SDValue Input = DAG.
getNode(ISD::BITCAST, SL, MVT::v2i32,
Op.getOperand(1));
4279 ReadFirstLaneID, NewModeReg);
4281 ReadFirstLaneID, NewTrapReg);
4283 unsigned ModeHwReg =
4286 unsigned TrapHwReg =
4294 IntrinID, ModeHwRegImm, NewModeReg);
4297 IntrinID, TrapHwRegImm, NewTrapReg);
4304 .
Case(
"m0", AMDGPU::M0)
4305 .
Case(
"exec", AMDGPU::EXEC)
4306 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
4307 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
4308 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
4309 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
4310 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
4313 if (Reg == AMDGPU::NoRegister) {
4327 case AMDGPU::EXEC_LO:
4328 case AMDGPU::EXEC_HI:
4329 case AMDGPU::FLAT_SCR_LO:
4330 case AMDGPU::FLAT_SCR_HI:
4335 case AMDGPU::FLAT_SCR:
4354 MI.setDesc(
TII->getKillTerminatorFromPseudo(
MI.getOpcode()));
4363static std::pair<MachineBasicBlock *, MachineBasicBlock *>
4385 auto Next = std::next(
I);
4398 return std::pair(LoopBB, RemainderBB);
4405 auto I =
MI.getIterator();
4406 auto E = std::next(
I);
4428 Src->setIsKill(
false);
4444 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
4447 BuildMI(*LoopBB,
I,
DL,
TII->get(AMDGPU::S_GETREG_B32), Reg)
4469 unsigned InitReg,
unsigned ResultReg,
unsigned PhiReg,
4470 unsigned InitSaveExecReg,
int Offset,
bool UseGPRIdxMode,
4479 Register PhiExec =
MRI.createVirtualRegister(BoolRC);
4480 Register NewExec =
MRI.createVirtualRegister(BoolRC);
4481 Register CurrentIdxReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
4482 Register CondReg =
MRI.createVirtualRegister(BoolRC);
4490 BuildMI(LoopBB,
I,
DL,
TII->get(TargetOpcode::PHI), PhiExec)
4497 BuildMI(LoopBB,
I,
DL,
TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentIdxReg)
4501 BuildMI(LoopBB,
I,
DL,
TII->get(AMDGPU::V_CMP_EQ_U32_e64), CondReg)
4506 BuildMI(LoopBB,
I,
DL,
TII->get(ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32
4507 : AMDGPU::S_AND_SAVEEXEC_B64),
4511 MRI.setSimpleHint(NewExec, CondReg);
4513 if (UseGPRIdxMode) {
4515 SGPRIdxReg = CurrentIdxReg;
4517 SGPRIdxReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
4518 BuildMI(LoopBB,
I,
DL,
TII->get(AMDGPU::S_ADD_I32), SGPRIdxReg)
4525 BuildMI(LoopBB,
I,
DL,
TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
4528 BuildMI(LoopBB,
I,
DL,
TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
4535 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
4537 BuildMI(LoopBB,
I,
DL,
TII->get(ST.isWave32() ? AMDGPU::S_XOR_B32_term
4538 : AMDGPU::S_XOR_B64_term), Exec)
4559 unsigned InitResultReg,
unsigned PhiReg,
int Offset,
4560 bool UseGPRIdxMode,
Register &SGPRIdxReg) {
4568 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
4570 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
4571 Register TmpExec =
MRI.createVirtualRegister(BoolXExecRC);
4572 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
4573 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
4588 InitResultReg, DstReg, PhiReg, TmpExec,
4589 Offset, UseGPRIdxMode, SGPRIdxReg);
4606static std::pair<unsigned, int>
4611 int NumElts =
TRI.getRegSizeInBits(*SuperRC) / 32;
4616 return std::pair(AMDGPU::sub0,
Offset);
4653 Register Tmp =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
4670 Register SrcReg =
TII->getNamedOperand(
MI, AMDGPU::OpName::src)->getReg();
4671 int Offset =
TII->getNamedOperand(
MI, AMDGPU::OpName::offset)->getImm();
4680 const bool UseGPRIdxMode = ST.useVGPRIndexMode();
4683 if (
TII->getRegisterInfo().isSGPRClass(IdxRC)) {
4687 if (UseGPRIdxMode) {
4694 TII->getIndirectGPRIDXPseudo(
TRI.getRegSizeInBits(*VecRC),
true);
4707 MI.eraseFromParent();
4716 Register PhiReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4717 Register InitReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4723 UseGPRIdxMode, SGPRIdxReg);
4727 if (UseGPRIdxMode) {
4729 TII->getIndirectGPRIDXPseudo(
TRI.getRegSizeInBits(*VecRC),
true);
4731 BuildMI(*LoopBB, InsPt,
DL, GPRIDXDesc, Dst)
4736 BuildMI(*LoopBB, InsPt,
DL,
TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
4741 MI.eraseFromParent();
4758 int Offset =
TII->getNamedOperand(
MI, AMDGPU::OpName::offset)->getImm();
4769 const bool UseGPRIdxMode = ST.useVGPRIndexMode();
4771 if (Idx->
getReg() == AMDGPU::NoRegister) {
4782 MI.eraseFromParent();
4787 if (
TII->getRegisterInfo().isSGPRClass(IdxRC)) {
4791 if (UseGPRIdxMode) {
4795 TII->getIndirectGPRIDXPseudo(
TRI.getRegSizeInBits(*VecRC),
false);
4804 const MCInstrDesc &MovRelDesc =
TII->getIndirectRegWriteMovRelPseudo(
4805 TRI.getRegSizeInBits(*VecRC), 32,
false);
4811 MI.eraseFromParent();
4821 Register PhiReg =
MRI.createVirtualRegister(VecRC);
4825 UseGPRIdxMode, SGPRIdxReg);
4828 if (UseGPRIdxMode) {
4830 TII->getIndirectGPRIDXPseudo(
TRI.getRegSizeInBits(*VecRC),
false);
4832 BuildMI(*LoopBB, InsPt,
DL, GPRIDXDesc, Dst)
4838 const MCInstrDesc &MovRelDesc =
TII->getIndirectRegWriteMovRelPseudo(
4839 TRI.getRegSizeInBits(*VecRC), 32,
false);
4840 BuildMI(*LoopBB, InsPt,
DL, MovRelDesc, Dst)
4846 MI.eraseFromParent();
4861 bool isSGPR =
TRI->isSGPRClass(
MRI.getRegClass(SrcReg));
4889 Register LoopIterator =
MRI.createVirtualRegister(WaveMaskRegClass);
4890 Register InitalValReg =
MRI.createVirtualRegister(DstRegClass);
4892 Register AccumulatorReg =
MRI.createVirtualRegister(DstRegClass);
4893 Register ActiveBitsReg =
MRI.createVirtualRegister(WaveMaskRegClass);
4894 Register NewActiveBitsReg =
MRI.createVirtualRegister(WaveMaskRegClass);
4896 Register FF1Reg =
MRI.createVirtualRegister(DstRegClass);
4897 Register LaneValueReg =
MRI.createVirtualRegister(DstRegClass);
4899 bool IsWave32 = ST.isWave32();
4900 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
4901 unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
4906 (Opc == AMDGPU::S_MIN_U32) ? std::numeric_limits<uint32_t>::max() : 0;
4909 BuildMI(BB,
I,
DL,
TII->get(AMDGPU::S_MOV_B32), InitalValReg)
4914 I = ComputeLoop->end();
4916 BuildMI(*ComputeLoop,
I,
DL,
TII->get(AMDGPU::PHI), AccumulatorReg)
4920 BuildMI(*ComputeLoop,
I,
DL,
TII->get(AMDGPU::PHI), ActiveBitsReg)
4921 .
addReg(TmpSReg->getOperand(0).getReg())
4925 unsigned SFFOpc = IsWave32 ? AMDGPU::S_FF1_I32_B32 : AMDGPU::S_FF1_I32_B64;
4926 auto FF1 =
BuildMI(*ComputeLoop,
I,
DL,
TII->get(SFFOpc), FF1Reg)
4927 .
addReg(ActiveBits->getOperand(0).getReg());
4928 auto LaneValue =
BuildMI(*ComputeLoop,
I,
DL,
4929 TII->get(AMDGPU::V_READLANE_B32), LaneValueReg)
4931 .
addReg(FF1->getOperand(0).getReg());
4932 auto NewAccumulator =
BuildMI(*ComputeLoop,
I,
DL,
TII->get(Opc), DstReg)
4934 .
addReg(LaneValue->getOperand(0).getReg());
4937 unsigned BITSETOpc =
4938 IsWave32 ? AMDGPU::S_BITSET0_B32 : AMDGPU::S_BITSET0_B64;
4939 auto NewActiveBits =
4940 BuildMI(*ComputeLoop,
I,
DL,
TII->get(BITSETOpc), NewActiveBitsReg)
4941 .
addReg(FF1->getOperand(0).getReg())
4942 .
addReg(ActiveBits->getOperand(0).getReg());
4947 ActiveBits.
addReg(NewActiveBits->getOperand(0).getReg())
4951 unsigned CMPOpc = IsWave32 ? AMDGPU::S_CMP_LG_U32 : AMDGPU::S_CMP_LG_U64;
4953 .
addReg(NewActiveBits->getOperand(0).getReg())
4955 BuildMI(*ComputeLoop,
I,
DL,
TII->get(AMDGPU::S_CBRANCH_SCC1))
4960 MI.eraseFromParent();
4971 switch (
MI.getOpcode()) {
4972 case AMDGPU::WAVE_REDUCE_UMIN_PSEUDO_U32:
4974 case AMDGPU::WAVE_REDUCE_UMAX_PSEUDO_U32:
4976 case AMDGPU::S_UADDO_PSEUDO:
4977 case AMDGPU::S_USUBO_PSEUDO: {
4984 unsigned Opc = (
MI.getOpcode() == AMDGPU::S_UADDO_PSEUDO)
4986 : AMDGPU::S_SUB_I32;
4993 MI.eraseFromParent();
4996 case AMDGPU::S_ADD_U64_PSEUDO:
4997 case AMDGPU::S_SUB_U64_PSEUDO: {
5006 bool IsAdd = (
MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
5008 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U64 : AMDGPU::S_SUB_U64;
5016 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5017 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5020 MI,
MRI, Src0, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
5022 MI,
MRI, Src0, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
5025 MI,
MRI, Src1, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
5027 MI,
MRI, Src1, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
5029 unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
5030 unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
5043 MI.eraseFromParent();
5046 case AMDGPU::V_ADD_U64_PSEUDO:
5047 case AMDGPU::V_SUB_U64_PSEUDO: {
5053 bool IsAdd = (
MI.getOpcode() == AMDGPU::V_ADD_U64_PSEUDO);
5059 if (IsAdd && ST.hasLshlAddB64()) {
5065 TII->legalizeOperands(*
Add);
5066 MI.eraseFromParent();
5070 const auto *CarryRC =
TRI->getWaveMaskRegClass();
5072 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5073 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5075 Register CarryReg =
MRI.createVirtualRegister(CarryRC);
5076 Register DeadCarryReg =
MRI.createVirtualRegister(CarryRC);
5080 : &AMDGPU::VReg_64RegClass;
5083 : &AMDGPU::VReg_64RegClass;
5086 TRI->getSubRegisterClass(Src0RC, AMDGPU::sub0);
5088 TRI->getSubRegisterClass(Src1RC, AMDGPU::sub1);
5091 MI,
MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
5093 MI,
MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
5096 MI,
MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
5098 MI,
MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
5100 unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
5107 unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
5121 TII->legalizeOperands(*LoHalf);
5122 TII->legalizeOperands(*HiHalf);
5123 MI.eraseFromParent();
5126 case AMDGPU::S_ADD_CO_PSEUDO:
5127 case AMDGPU::S_SUB_CO_PSEUDO: {
5141 unsigned Opc = (
MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
5142 ? AMDGPU::S_ADDC_U32
5143 : AMDGPU::S_SUBB_U32;
5145 Register RegOp0 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5146 BuildMI(*BB, MII,
DL,
TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp0)
5151 Register RegOp1 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5152 BuildMI(*BB, MII,
DL,
TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp1)
5156 Register RegOp2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5158 BuildMI(*BB, MII,
DL,
TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp2)
5164 unsigned WaveSize =
TRI->getRegSizeInBits(*Src2RC);
5165 assert(WaveSize == 64 || WaveSize == 32);
5167 if (WaveSize == 64) {
5168 if (ST.hasScalarCompareEq64()) {
5174 TRI->getSubRegisterClass(Src2RC, AMDGPU::sub0);
5176 MII,
MRI, Src2, Src2RC, AMDGPU::sub0, SubRC);
5178 MII,
MRI, Src2, Src2RC, AMDGPU::sub1, SubRC);
5179 Register Src2_32 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5181 BuildMI(*BB, MII,
DL,
TII->get(AMDGPU::S_OR_B32), Src2_32)
5198 (WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
5204 MI.eraseFromParent();
5207 case AMDGPU::SI_INIT_M0: {
5209 TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
5210 .
add(
MI.getOperand(0));
5211 MI.eraseFromParent();
5214 case AMDGPU::GET_GROUPSTATICSIZE: {
5219 .
add(
MI.getOperand(0))
5221 MI.eraseFromParent();
5224 case AMDGPU::GET_SHADERCYCLESHILO: {
5238 using namespace AMDGPU::Hwreg;
5239 Register RegHi1 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5241 .
addImm(HwregEncoding::encode(ID_SHADER_CYCLES_HI, 0, 32));
5242 Register RegLo1 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5244 .
addImm(HwregEncoding::encode(ID_SHADER_CYCLES, 0, 32));
5245 Register RegHi2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5247 .
addImm(HwregEncoding::encode(ID_SHADER_CYCLES_HI, 0, 32));
5251 Register RegLo =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5256 .
add(
MI.getOperand(0))
5261 MI.eraseFromParent();
5264 case AMDGPU::SI_INDIRECT_SRC_V1:
5265 case AMDGPU::SI_INDIRECT_SRC_V2:
5266 case AMDGPU::SI_INDIRECT_SRC_V4:
5267 case AMDGPU::SI_INDIRECT_SRC_V8:
5268 case AMDGPU::SI_INDIRECT_SRC_V9:
5269 case AMDGPU::SI_INDIRECT_SRC_V10:
5270 case AMDGPU::SI_INDIRECT_SRC_V11:
5271 case AMDGPU::SI_INDIRECT_SRC_V12:
5272 case AMDGPU::SI_INDIRECT_SRC_V16:
5273 case AMDGPU::SI_INDIRECT_SRC_V32:
5275 case AMDGPU::SI_INDIRECT_DST_V1:
5276 case AMDGPU::SI_INDIRECT_DST_V2:
5277 case AMDGPU::SI_INDIRECT_DST_V4:
5278 case AMDGPU::SI_INDIRECT_DST_V8:
5279 case AMDGPU::SI_INDIRECT_DST_V9:
5280 case AMDGPU::SI_INDIRECT_DST_V10:
5281 case AMDGPU::SI_INDIRECT_DST_V11:
5282 case AMDGPU::SI_INDIRECT_DST_V12:
5283 case AMDGPU::SI_INDIRECT_DST_V16:
5284 case AMDGPU::SI_INDIRECT_DST_V32:
5286 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
5287 case AMDGPU::SI_KILL_I1_PSEUDO:
5289 case AMDGPU::V_CNDMASK_B64_PSEUDO: {
5298 Register SrcCond =
MI.getOperand(3).getReg();
5300 Register DstLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5301 Register DstHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5302 const auto *CondRC =
TRI->getWaveMaskRegClass();
5303 Register SrcCondCopy =
MRI.createVirtualRegister(CondRC);
5307 : &AMDGPU::VReg_64RegClass;
5310 : &AMDGPU::VReg_64RegClass;
5313 TRI->getSubRegisterClass(Src0RC, AMDGPU::sub0);
5315 TRI->getSubRegisterClass(Src1RC, AMDGPU::sub1);
5318 MI,
MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
5320 MI,
MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
5323 MI,
MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
5325 MI,
MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
5347 MI.eraseFromParent();
5350 case AMDGPU::SI_BR_UNDEF: {
5354 .
add(
MI.getOperand(0));
5356 MI.eraseFromParent();
5359 case AMDGPU::ADJCALLSTACKUP:
5360 case AMDGPU::ADJCALLSTACKDOWN: {
5367 case AMDGPU::SI_CALL_ISEL: {
5371 unsigned ReturnAddrReg =
TII->getRegisterInfo().getReturnAddressReg(*MF);
5374 MIB =
BuildMI(*BB,
MI,
DL,
TII->get(AMDGPU::SI_CALL), ReturnAddrReg);
5380 MI.eraseFromParent();
5383 case AMDGPU::V_ADD_CO_U32_e32:
5384 case AMDGPU::V_SUB_CO_U32_e32:
5385 case AMDGPU::V_SUBREV_CO_U32_e32: {
5388 unsigned Opc =
MI.getOpcode();
5390 bool NeedClampOperand =
false;
5391 if (
TII->pseudoToMCOpcode(Opc) == -1) {
5393 NeedClampOperand =
true;
5397 if (
TII->isVOP3(*
I)) {
5402 I.add(
MI.getOperand(1))
5403 .add(
MI.getOperand(2));
5404 if (NeedClampOperand)
5407 TII->legalizeOperands(*
I);
5409 MI.eraseFromParent();
5412 case AMDGPU::V_ADDC_U32_e32:
5413 case AMDGPU::V_SUBB_U32_e32:
5414 case AMDGPU::V_SUBBREV_U32_e32:
5417 TII->legalizeOperands(
MI);
5419 case AMDGPU::DS_GWS_INIT:
5420 case AMDGPU::DS_GWS_SEMA_BR:
5421 case AMDGPU::DS_GWS_BARRIER:
5422 TII->enforceOperandRCAlignment(
MI, AMDGPU::OpName::data0);
5424 case AMDGPU::DS_GWS_SEMA_V:
5425 case AMDGPU::DS_GWS_SEMA_P:
5426 case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:
5434 case AMDGPU::S_SETREG_B32: {
5444 auto [ID,
Offset, Width] =
5450 const unsigned SetMask = WidthMask <<
Offset;
5453 unsigned SetDenormOp = 0;
5454 unsigned SetRoundOp = 0;
5462 SetRoundOp = AMDGPU::S_ROUND_MODE;
5463 SetDenormOp = AMDGPU::S_DENORM_MODE;
5465 SetRoundOp = AMDGPU::S_ROUND_MODE;
5467 SetDenormOp = AMDGPU::S_DENORM_MODE;
5470 if (SetRoundOp || SetDenormOp) {
5473 if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) {
5474 unsigned ImmVal = Def->getOperand(1).getImm();
5488 MI.eraseFromParent();
5497 MI.setDesc(
TII->get(AMDGPU::S_SETREG_B32_mode));
5501 case AMDGPU::S_INVERSE_BALLOT_U32:
5502 case AMDGPU::S_INVERSE_BALLOT_U64:
5505 MI.setDesc(
TII->get(AMDGPU::COPY));
5507 case AMDGPU::ENDPGM_TRAP: {
5510 MI.setDesc(
TII->get(AMDGPU::S_ENDPGM));
5528 MI.eraseFromParent();
5531 case AMDGPU::SIMULATED_TRAP: {
5535 TII->insertSimulatedTrap(
MRI, *BB,
MI,
MI.getDebugLoc());
5536 MI.eraseFromParent();
5573 return (VT == MVT::i16) ? MVT::i16 : MVT::i32;
5577 return (Ty.getScalarSizeInBits() <= 16 && Subtarget->
has16BitInsts())
5629 switch (Ty.getScalarSizeInBits()) {
5647 if (Ty.getScalarSizeInBits() == 16)
5649 if (Ty.getScalarSizeInBits() == 32)
5660 EVT VT =
N->getValueType(0);
5664 if (VT == MVT::f16) {
5680 unsigned Opc =
Op.getOpcode();
5681 EVT VT =
Op.getValueType();
5682 assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
5683 VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 ||
5684 VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
5685 VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16);
5703 unsigned Opc =
Op.getOpcode();
5704 EVT VT =
Op.getValueType();
5705 assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
5706 VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 ||
5707 VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
5708 VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16);
5727 unsigned Opc =
Op.getOpcode();
5728 EVT VT =
Op.getValueType();
5729 assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 ||
5730 VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v16i16 ||
5731 VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
5732 VT == MVT::v32f32 || VT == MVT::v32f16 || VT == MVT::v32i16 ||
5733 VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v16bf16 ||
5734 VT == MVT::v32bf16);
5740 : std::pair(Op0, Op0);
5759 switch (
Op.getOpcode()) {
5761 case ISD::BRCOND:
return LowerBRCOND(
Op, DAG);
5765 assert((!Result.getNode() ||
5766 Result.getNode()->getNumValues() == 2) &&
5767 "Load should return a value and a chain");
5771 EVT VT =
Op.getValueType();
5773 return lowerFSQRTF32(
Op, DAG);
5775 return lowerFSQRTF64(
Op, DAG);
5780 return LowerTrig(
Op, DAG);
5783 case ISD::FFREXP:
return LowerFFREXP(
Op, DAG);
5784 case ISD::ATOMIC_CMP_SWAP:
return LowerATOMIC_CMP_SWAP(
Op, DAG);
5785 case ISD::STORE:
return LowerSTORE(
Op, DAG);
5789 return LowerGlobalAddress(MFI,
Op, DAG);
5794 case ISD::ADDRSPACECAST:
return lowerADDRSPACECAST(
Op, DAG);
5796 return lowerINSERT_SUBVECTOR(
Op, DAG);
5798 return lowerINSERT_VECTOR_ELT(
Op, DAG);
5800 return lowerEXTRACT_VECTOR_ELT(
Op, DAG);
5802 return lowerVECTOR_SHUFFLE(
Op, DAG);
5804 return lowerSCALAR_TO_VECTOR(
Op, DAG);
5806 return lowerBUILD_VECTOR(
Op, DAG);
5809 return lowerFP_ROUND(
Op, DAG);
5811 return lowerTRAP(
Op, DAG);
5812 case ISD::DEBUGTRAP:
5813 return lowerDEBUGTRAP(
Op, DAG);
5822 return lowerFMINNUM_FMAXNUM(
Op, DAG);
5825 return lowerFLDEXP(
Op, DAG);
5842 case ISD::FMINNUM_IEEE:
5843 case ISD::FMAXNUM_IEEE:
5846 case ISD::FMINIMUMNUM:
5847 case ISD::FMAXIMUMNUM:
5854 return lowerMUL(
Op, DAG);
5857 return lowerXMULO(
Op, DAG);
5860 return lowerXMUL_LOHI(
Op, DAG);
5861 case ISD::DYNAMIC_STACKALLOC:
5863 case ISD::STACKSAVE:
5867 case ISD::SET_ROUNDING:
5871 case ISD::FP_EXTEND:
5874 case ISD::GET_FPENV:
5876 case ISD::SET_FPENV:
5893 EVT FittingLoadVT = LoadVT;
5918 return DAG.
getNode(ISD::BITCAST,
DL, FittingLoadVT, Result);
5922 return DAG.
getNode(ISD::BITCAST,
DL, FittingLoadVT, Result);
5925SDValue SITargetLowering::adjustLoadValueType(
unsigned Opcode,
5929 bool IsIntrinsic)
const {
5933 EVT LoadVT = M->getValueType(0);
5935 EVT EquivLoadVT = LoadVT;
5954 VTList, Ops, M->getMemoryVT(),
5955 M->getMemOperand());
5966 EVT LoadVT = M->getValueType(0);
5972 assert(M->getNumValues() == 2 || M->getNumValues() == 3);
5973 bool IsTFE = M->getNumValues() == 3;
5986 return handleByteShortBufferLoads(DAG, LoadVT,
DL, Ops, M->getMemOperand(),
5990 return getMemIntrinsicNode(Opc,
DL, M->getVTList(), Ops, IntVT,
5991 M->getMemOperand(), DAG);
5996 SDValue MemNode = getMemIntrinsicNode(Opc,
DL, VTList, Ops, CastVT,
5997 M->getMemOperand(), DAG);
6005 EVT VT =
N->getValueType(0);
6006 unsigned CondCode =
N->getConstantOperandVal(3);
6017 EVT CmpVT = LHS.getValueType();
6018 if (CmpVT == MVT::i16 && !TLI.
isTypeLegal(MVT::i16)) {
6021 LHS = DAG.
getNode(PromoteOp,
DL, MVT::i32, LHS);
6022 RHS = DAG.
getNode(PromoteOp,
DL, MVT::i32, RHS);
6039 EVT VT =
N->getValueType(0);
6041 unsigned CondCode =
N->getConstantOperandVal(3);
6050 if (CmpVT == MVT::f16 && !TLI.
isTypeLegal(CmpVT)) {
6051 Src0 = DAG.
getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
6052 Src1 = DAG.
getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
6068 EVT VT =
N->getValueType(0);
6075 Src.getOperand(1), Src.getOperand(2));
6086 Exec = AMDGPU::EXEC_LO;
6088 Exec = AMDGPU::EXEC;
6105 EVT VT =
N->getValueType(0);
6107 unsigned IID =
N->getConstantOperandVal(0);
6108 bool IsPermLane16 = IID == Intrinsic::amdgcn_permlane16 ||
6109 IID == Intrinsic::amdgcn_permlanex16;
6110 bool IsSetInactive = IID == Intrinsic::amdgcn_set_inactive ||
6111 IID == Intrinsic::amdgcn_set_inactive_chain_arg;
6119 case Intrinsic::amdgcn_permlane16:
6120 case Intrinsic::amdgcn_permlanex16:
6125 case Intrinsic::amdgcn_writelane:
6128 case Intrinsic::amdgcn_readlane:
6129 case Intrinsic::amdgcn_set_inactive:
6130 case Intrinsic::amdgcn_set_inactive_chain_arg:
6133 case Intrinsic::amdgcn_readfirstlane:
6134 case Intrinsic::amdgcn_permlane64:
6146 GL = GL->getOperand(0).getNode();
6156 if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane ||
6157 IsSetInactive || IsPermLane16) {
6158 Src1 =
N->getOperand(2);
6159 if (IID == Intrinsic::amdgcn_writelane || IsPermLane16)
6160 Src2 =
N->getOperand(3);
6163 if (ValSize == 32) {
6173 if (IsSetInactive || IsPermLane16) {
6178 if (IID == Intrinsic::amdgcn_writelane) {
6183 SDValue LaneOp = createLaneOp(Src0, Src1, Src2, MVT::i32);
6185 return IsFloat ? DAG.
getBitcast(VT, Trunc) : Trunc;
6188 if (ValSize % 32 != 0)
6192 EVT VT =
N->getValueType(0);
6196 unsigned NumOperands =
N->getNumOperands();
6203 for (
unsigned i = 0; i != NE; ++i) {
6204 for (
unsigned j = 0, e = GL ? NumOperands - 1 : NumOperands; j != e;
6237 return unrollLaneOp(LaneOp.
getNode());
6244 SDValue Src0SubVec, Src1SubVec, Src2SubVec;
6245 for (
unsigned i = 0, EltIdx = 0; i < ValSize / 32; i++) {
6249 if (IsSetInactive || IsPermLane16)
6253 if (IID == Intrinsic::amdgcn_writelane)
6258 IsSetInactive || IsPermLane16
6259 ? createLaneOp(Src0SubVec, Src1SubVec, Src2, SubVecVT)
6260 : createLaneOp(Src0SubVec, Src1, Src2SubVec, SubVecVT));
6274 if (IsSetInactive || IsPermLane16)
6277 if (IID == Intrinsic::amdgcn_writelane)
6280 SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VecVT);
6288 switch (
N->getOpcode()) {
6300 unsigned IID =
N->getConstantOperandVal(0);
6302 case Intrinsic::amdgcn_make_buffer_rsrc:
6303 Results.push_back(lowerPointerAsRsrcIntrin(
N, DAG));
6305 case Intrinsic::amdgcn_cvt_pkrtz: {
6311 Results.push_back(DAG.
getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt));
6314 case Intrinsic::amdgcn_cvt_pknorm_i16:
6315 case Intrinsic::amdgcn_cvt_pknorm_u16:
6316 case Intrinsic::amdgcn_cvt_pk_i16:
6317 case Intrinsic::amdgcn_cvt_pk_u16: {
6323 if (IID == Intrinsic::amdgcn_cvt_pknorm_i16)
6325 else if (IID == Intrinsic::amdgcn_cvt_pknorm_u16)
6327 else if (IID == Intrinsic::amdgcn_cvt_pk_i16)
6332 EVT VT =
N->getValueType(0);
6337 Results.push_back(DAG.
getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt));
6341 case Intrinsic::amdgcn_s_buffer_load: {
6353 EVT VT =
Op.getValueType();
6354 assert(VT == MVT::i8 &&
"Expected 8-bit s_buffer_load intrinsics.\n");
6366 if (!
Offset->isDivergent()) {
6385 LoadVal = handleByteShortBufferLoads(DAG, VT,
DL, Ops, MMO);
6397 for (
unsigned I = 0;
I < Res.getNumOperands();
I++) {
6398 Results.push_back(Res.getOperand(
I));
6402 Results.push_back(Res.getValue(1));
6411 EVT VT =
N->getValueType(0);
6416 EVT SelectVT = NewVT;
6417 if (NewVT.
bitsLT(MVT::i32)) {
6420 SelectVT = MVT::i32;
6424 N->getOperand(0), LHS, RHS);
6426 if (NewVT != SelectVT)
6432 if (
N->getValueType(0) != MVT::v2f16)
6436 SDValue BC = DAG.
getNode(ISD::BITCAST, SL, MVT::i32,
N->getOperand(0));
6445 if (
N->getValueType(0) != MVT::v2f16)
6449 SDValue BC = DAG.
getNode(ISD::BITCAST, SL, MVT::i32,
N->getOperand(0));
6458 if (
N->getValueType(0) != MVT::f16)
6476 if (
I.getUse().get() !=
Value)
6479 if (
I->getOpcode() == Opcode)
6485unsigned SITargetLowering::isCFIntrinsic(
const SDNode *
Intr)
const {
6487 switch (
Intr->getConstantOperandVal(1)) {
6488 case Intrinsic::amdgcn_if:
6490 case Intrinsic::amdgcn_else:
6492 case Intrinsic::amdgcn_loop:
6494 case Intrinsic::amdgcn_end_cf:
6542 SDNode *
Intr = BRCOND.getOperand(1).getNode();
6555 assert(BR &&
"brcond missing unconditional branch user");
6559 unsigned CFNode = isCFIntrinsic(
Intr);
6578 Ops.
append(
Intr->op_begin() + (HaveChain ? 2 : 1),
Intr->op_end());
6608 for (
unsigned i = 1, e =
Intr->getNumValues() - 1; i != e; ++i) {
6625 Intr->getOperand(0));
6632 MVT VT =
Op.getSimpleValueType();
6635 if (
Op.getConstantOperandVal(0) != 0)
6641 if (Info->isEntryFunction())
6659 return Op.getValueType().bitsLE(VT) ?
6666 assert(
Op.getValueType() == MVT::f16 &&
6667 "Do not know how to custom lower FP_ROUND for non-f16 type");
6670 EVT SrcVT = Src.getValueType();
6671 if (SrcVT != MVT::f64)
6682 return DAG.
getNode(ISD::BITCAST,
DL, MVT::f16, Trunc);
6687 EVT VT =
Op.getValueType();
6699 if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16 ||
6707 EVT VT =
Op.getValueType();
6711 EVT ExpVT =
Exp.getValueType();
6712 if (ExpVT == MVT::i16)
6733 {
Op.getOperand(0),
Op.getOperand(1), TruncExp});
6736 return DAG.
getNode(ISD::FLDEXP,
DL, VT,
Op.getOperand(0), TruncExp);
6740 switch (
Op->getOpcode()) {
6770 DAGCombinerInfo &DCI)
const {
6771 const unsigned Opc =
Op.getOpcode();
6779 :
Op->getOperand(0).getValueType();
6782 if (DCI.isBeforeLegalizeOps() ||
6786 auto &DAG = DCI.DAG;
6792 LHS =
Op->getOperand(1);
6793 RHS =
Op->getOperand(2);
6795 LHS =
Op->getOperand(0);
6796 RHS =
Op->getOperand(1);
6800 LHS = DAG.
getNode(ExtOp,
DL, ExtTy, {LHS});
6806 RHS = DAG.
getNode(ExtOp,
DL, ExtTy, {RHS});
6827 EVT VT =
Op.getValueType();
6833 assert(VT == MVT::i64 &&
"The following code is a special for s_mul_u64");
6860 if (
Op->isDivergent())
6873 if (Op0LeadingZeros >= 32 && Op1LeadingZeros >= 32)
6875 DAG.
getMachineNode(AMDGPU::S_MUL_U64_U32_PSEUDO, SL, VT, Op0, Op1), 0);
6878 if (Op0SignBits >= 33 && Op1SignBits >= 33)
6880 DAG.
getMachineNode(AMDGPU::S_MUL_I64_I32_PSEUDO, SL, VT, Op0, Op1), 0);
6886 EVT VT =
Op.getValueType();
6893 const APInt &
C = RHSC->getAPIntValue();
6895 if (
C.isPowerOf2()) {
6897 bool UseArithShift =
isSigned && !
C.isMinSignedValue();
6902 SL, VT, Result, ShiftAmt),
6922 if (
Op->isDivergent()) {
6939 return lowerTrapEndpgm(
Op, DAG);
6942 lowerTrapHsaQueuePtr(
Op, DAG);
6945SDValue SITargetLowering::lowerTrapEndpgm(
6953 const SDLoc &
DL,
Align Alignment, ImplicitParameter Param)
const {
6963SDValue SITargetLowering::lowerTrapHsaQueuePtr(
6973 loadImplicitKernelArgument(DAG, MVT::i64, SL,
Align(8),
QUEUE_PTR);
6977 Register UserSGPR = Info->getQueuePtrUserSGPR();
6979 if (UserSGPR == AMDGPU::NoRegister) {
7004SDValue SITargetLowering::lowerTrapHsa(
7030 "debugtrap handler not supported",
7046SDValue SITargetLowering::getSegmentAperture(
unsigned AS,
const SDLoc &
DL,
7050 ? AMDGPU::SRC_SHARED_BASE
7051 : AMDGPU::SRC_PRIVATE_BASE;
7074 {SDValue(Mov, 0), DAG.getConstant(32, DL, MVT::i64)}));
7083 return loadImplicitKernelArgument(DAG, MVT::i32,
DL,
Align(4), Param);
7088 Register UserSGPR = Info->getQueuePtrUserSGPR();
7089 if (UserSGPR == AMDGPU::NoRegister) {
7096 DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
7124 return ConstVal->getSExtValue() != TM.getNullPointerValue(AddrSpace);
7138 unsigned DestAS, SrcAS;
7140 bool IsNonNull =
false;
7142 SrcAS = ASC->getSrcAddressSpace();
7143 Src = ASC->getOperand(0);
7144 DestAS = ASC->getDestAddressSpace();
7147 Op.getConstantOperandVal(0) ==
7148 Intrinsic::amdgcn_addrspacecast_nonnull);
7149 Src =
Op->getOperand(1);
7150 SrcAS =
Op->getConstantOperandVal(2);
7151 DestAS =
Op->getConstantOperandVal(3);
7166 unsigned NullVal =
TM.getNullPointerValue(DestAS);
7180 SDValue Aperture = getSegmentAperture(SrcAS, SL, DAG);
7183 CvtPtr = DAG.
getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr);
7188 unsigned NullVal =
TM.getNullPointerValue(SrcAS);
7200 Op.getValueType() == MVT::i64) {
7205 return DAG.
getNode(ISD::BITCAST, SL, MVT::i64, Vec);
7209 Src.getValueType() == MVT::i64)
7233 EVT InsVT =
Ins.getValueType();
7241 assert(InsNumElts % 2 == 0 &&
"expect legal vector types");
7246 EVT NewInsVT = InsNumElts == 2 ? MVT::i32
7248 MVT::i32, InsNumElts / 2);
7250 Vec = DAG.
getNode(ISD::BITCAST, SL, NewVecVT, Vec);
7251 Ins = DAG.
getNode(ISD::BITCAST, SL, NewInsVT, Ins);
7253 for (
unsigned I = 0;
I != InsNumElts / 2; ++
I) {
7255 if (InsNumElts == 2) {
7265 return DAG.
getNode(ISD::BITCAST, SL, VecVT, Vec);
7268 for (
unsigned I = 0;
I != InsNumElts; ++
I) {
7291 if (NumElts == 4 && EltSize == 16 && KIdx) {
7299 SDValue LoVec = DAG.
getNode(ISD::BITCAST, SL, MVT::v2i16, LoHalf);
7300 SDValue HiVec = DAG.
getNode(ISD::BITCAST, SL, MVT::v2i16, HiHalf);
7302 unsigned Idx = KIdx->getZExtValue();
7303 bool InsertLo = Idx < 2;
7305 InsertLo ? LoVec : HiVec,
7306 DAG.
getNode(ISD::BITCAST, SL, MVT::i16, InsVal),
7307 DAG.
getConstant(InsertLo ? Idx : (Idx - 2), SL, MVT::i32));
7309 InsHalf = DAG.
getNode(ISD::BITCAST, SL, MVT::i32, InsHalf);
7313 DAG.getBuildVector(
MVT::v2i32, SL, { LoHalf, InsHalf });
7326 assert(VecSize <= 64 &&
"Expected target vector size to be <= 64 bits");
7348 DAG.
getNOT(SL, BFM, IntVT), BCVec);
7353 return DAG.
getNode(ISD::BITCAST, SL, VecVT, BFI);
7360 EVT ResultVT =
Op.getValueType();
7373 if (
SDValue Combined = performExtractVectorEltCombine(
Op.getNode(), DCI))
7376 if (VecSize == 128 || VecSize == 256 || VecSize == 512) {
7381 if (VecSize == 128) {
7389 }
else if (VecSize == 256) {
7392 for (
unsigned P = 0;
P < 4; ++
P) {
7398 Parts[0], Parts[1]));
7400 Parts[2], Parts[3]));
7406 for (
unsigned P = 0;
P < 8; ++
P) {
7413 Parts[0], Parts[1], Parts[2], Parts[3]));
7416 Parts[4], Parts[5],Parts[6], Parts[7]));
7436 Src = DAG.
getBitcast(Src.getValueType().changeTypeToInteger(), Src);
7451 if (ResultVT == MVT::f16 || ResultVT == MVT::bf16) {
7453 return DAG.
getNode(ISD::BITCAST, SL, ResultVT, Result);
7461 return Mask[Elt + 1] == Mask[Elt] + 1 && (Mask[Elt] % 2 == 0);
7467 EVT ResultVT =
Op.getValueType();
7470 EVT PackVT = ResultVT.
isInteger() ? MVT::v2i16 : MVT::v2f16;
7472 int SrcNumElts =
Op.getOperand(0).getValueType().getVectorNumElements();
7488 int VecIdx = Idx < SrcNumElts ? 0 : 1;
7489 int EltIdx = Idx < SrcNumElts ? Idx : Idx - SrcNumElts;
7497 int VecIdx0 = Idx0 < SrcNumElts ? 0 : 1;
7498 int VecIdx1 = Idx1 < SrcNumElts ? 0 : 1;
7499 int EltIdx0 = Idx0 < SrcNumElts ? Idx0 : Idx0 - SrcNumElts;
7500 int EltIdx1 = Idx1 < SrcNumElts ? Idx1 : Idx1 - SrcNumElts;
7519 EVT ResultVT =
Op.getValueType();
7535 EVT VT =
Op.getValueType();
7537 if (VT == MVT::v2f16 || VT == MVT::v2i16 || VT == MVT::v2bf16) {
7547 return DAG.
getNode(ISD::BITCAST, SL, VT, ExtLo);
7556 return DAG.
getNode(ISD::BITCAST, SL, VT, ShlHi);
7562 return DAG.
getNode(ISD::BITCAST, SL, VT,
Or);
7571 for (
unsigned P = 0;
P < NumParts; ++
P) {
7573 PartVT, SL, {
Op.getOperand(
P * 2),
Op.getOperand(
P * 2 + 1)});
7579 return DAG.
getNode(ISD::BITCAST, SL, VT, Blend);
7644 EVT PtrVT =
Op.getValueType();
7660 assert(PtrVT == MVT::i32 &&
"32-bit pointer is expected.");
7733 SDValue Param = lowerKernargMemParameter(
7743 "non-hsa intrinsic with hsa target",
7752 "intrinsic not supported on subtarget",
7762 unsigned NumElts = Elts.
size();
7764 if (NumElts <= 12) {
7773 for (
unsigned i = 0; i < Elts.
size(); ++i) {
7779 for (
unsigned i = Elts.
size(); i < NumElts; ++i)
7780 VecElts[i] = DAG.
getUNDEF(MVT::f32);
7789 EVT SrcVT = Src.getValueType();
7810 bool Unpacked,
bool IsD16,
int DMaskPop,
7811 int NumVDataDwords,
bool IsAtomicPacked16Bit,
7814 EVT ReqRetVT = ResultTypes[0];
7816 int NumDataDwords = ((IsD16 && !Unpacked) || IsAtomicPacked16Bit)
7817 ? (ReqRetNumElts + 1) / 2
7820 int MaskPopDwords = (!IsD16 || Unpacked) ? DMaskPop : (DMaskPop + 1) / 2;
7822 MVT DataDwordVT = NumDataDwords == 1 ?
7825 MVT MaskPopVT = MaskPopDwords == 1 ?
7831 if (DMaskPop > 0 &&
Data.getValueType() != MaskPopVT) {
7842 if (DataDwordVT.
isVector() && !IsAtomicPacked16Bit)
7844 NumDataDwords - MaskPopDwords);
7849 EVT LegalReqRetVT = ReqRetVT;
7851 if (!
Data.getValueType().isInteger())
7853 Data.getValueType().changeTypeToInteger(),
Data);
7874 if (Result->getNumValues() == 1)
7881 SDValue *LWE,
bool &IsTexFail) {
7901 unsigned DimIdx,
unsigned EndIdx,
7902 unsigned NumGradients) {
7904 for (
unsigned I = DimIdx;
I < EndIdx;
I++) {
7912 if (((
I + 1) >= EndIdx) ||
7913 ((NumGradients / 2) % 2 == 1 && (
I == DimIdx + (NumGradients / 2) - 1 ||
7914 I == DimIdx + NumGradients - 1))) {
7915 if (
Addr.getValueType() != MVT::i16)
7936 unsigned IntrOpcode =
Intr->BaseOpcode;
7947 int NumVDataDwords = 0;
7948 bool AdjustRetType =
false;
7949 bool IsAtomicPacked16Bit =
false;
7952 const unsigned ArgOffset = WithChain ? 2 : 1;
7955 unsigned DMaskLanes = 0;
7957 if (BaseOpcode->Atomic) {
7958 VData =
Op.getOperand(2);
7960 IsAtomicPacked16Bit =
7961 (
Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
7962 Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16);
7965 if (BaseOpcode->AtomicX2) {
7972 ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32;
7973 DMask = Is64Bit ? 0xf : 0x3;
7974 NumVDataDwords = Is64Bit ? 4 : 2;
7976 DMask = Is64Bit ? 0x3 : 0x1;
7977 NumVDataDwords = Is64Bit ? 2 : 1;
7980 DMask =
Op->getConstantOperandVal(ArgOffset +
Intr->DMaskIndex);
7983 if (BaseOpcode->Store) {
7984 VData =
Op.getOperand(2);
7992 VData = handleD16VData(VData, DAG,
true);
7995 NumVDataDwords = (VData.
getValueType().getSizeInBits() + 31) / 32;
7996 }
else if (!BaseOpcode->NoReturn) {
8009 (!LoadVT.
isVector() && DMaskLanes > 1))
8017 NumVDataDwords = (DMaskLanes + 1) / 2;
8019 NumVDataDwords = DMaskLanes;
8021 AdjustRetType =
true;
8025 unsigned VAddrEnd = ArgOffset +
Intr->VAddrEnd;
8030 Op.getOperand(ArgOffset +
Intr->GradientStart).getSimpleValueType();
8032 MVT GradPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
8033 IsG16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
8035 VAddrVT =
Op.getOperand(ArgOffset +
Intr->CoordStart).getSimpleValueType();
8037 MVT AddrPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
8038 IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
8042 if (IsA16 && (
Op.getOperand(ArgOffset +
I).getValueType() == MVT::f16)) {
8043 assert(
I ==
Intr->BiasIndex &&
"Got unexpected 16-bit extra argument");
8048 {
Op.getOperand(ArgOffset +
I), DAG.
getUNDEF(MVT::f16)});
8052 "Bias needs to be converted to 16 bit in A16 mode");
8057 if (BaseOpcode->Gradients && !
ST->hasG16() && (IsA16 != IsG16)) {
8061 dbgs() <<
"Failed to lower image intrinsic: 16 bit addresses "
8062 "require 16 bit args for both gradients and addresses");
8067 if (!
ST->hasA16()) {
8068 LLVM_DEBUG(
dbgs() <<
"Failed to lower image intrinsic: Target does not "
8069 "support 16 bit addresses\n");
8079 if (BaseOpcode->Gradients && IsG16 &&
ST->hasG16()) {
8083 IntrOpcode = G16MappingInfo->
G16;
8091 ArgOffset +
Intr->GradientStart,
8092 ArgOffset +
Intr->CoordStart,
Intr->NumGradients);
8094 for (
unsigned I = ArgOffset +
Intr->GradientStart;
8102 ArgOffset +
Intr->CoordStart, VAddrEnd,
8106 for (
unsigned I = ArgOffset +
Intr->CoordStart;
I < VAddrEnd;
I++)
8124 const unsigned NSAMaxSize =
ST->getNSAMaxSize(BaseOpcode->Sampler);
8125 const bool HasPartialNSAEncoding =
ST->hasPartialNSAEncoding();
8126 const bool UseNSA =
ST->hasNSAEncoding() &&
8127 VAddrs.
size() >=
ST->getNSAThreshold(MF) &&
8128 (VAddrs.
size() <= NSAMaxSize || HasPartialNSAEncoding);
8129 const bool UsePartialNSA =
8130 UseNSA && HasPartialNSAEncoding && VAddrs.
size() > NSAMaxSize;
8133 if (UsePartialNSA) {
8135 ArrayRef(VAddrs).drop_front(NSAMaxSize - 1));
8144 if (!BaseOpcode->Sampler) {
8148 Op.getConstantOperandVal(ArgOffset +
Intr->UnormIndex);
8150 Unorm = UnormConst ? True : False;
8155 SDValue TexFail =
Op.getOperand(ArgOffset +
Intr->TexFailCtrlIndex);
8156 bool IsTexFail =
false;
8157 if (!
parseTexFail(TexFail, DAG, &TFE, &LWE, IsTexFail))
8168 NumVDataDwords += 1;
8169 AdjustRetType =
true;
8174 if (AdjustRetType) {
8176 if (DMaskLanes == 0 && !BaseOpcode->Store) {
8184 EVT NewVT = NumVDataDwords > 1 ?
8188 ResultTypes[0] = NewVT;
8189 if (ResultTypes.size() == 3) {
8193 ResultTypes.erase(&ResultTypes[1]);
8197 unsigned CPol =
Op.getConstantOperandVal(ArgOffset +
Intr->CachePolicyIndex);
8198 if (BaseOpcode->Atomic)
8205 if (BaseOpcode->Store || BaseOpcode->Atomic)
8207 if (UsePartialNSA) {
8217 if (RsrcVT != MVT::v4i32 && RsrcVT != MVT::v8i32)
8220 if (BaseOpcode->Sampler) {
8229 if (!IsGFX12Plus || BaseOpcode->Sampler || BaseOpcode->MSAA)
8233 ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
8241 if (!IsGFX12Plus || BaseOpcode->Sampler || BaseOpcode->MSAA)
8245 if (BaseOpcode->HasD16)
8250 int NumVAddrDwords =
8256 NumVDataDwords, NumVAddrDwords);
8257 }
else if (IsGFX11Plus) {
8259 UseNSA ? AMDGPU::MIMGEncGfx11NSA
8260 : AMDGPU::MIMGEncGfx11Default,
8261 NumVDataDwords, NumVAddrDwords);
8262 }
else if (IsGFX10Plus) {
8264 UseNSA ? AMDGPU::MIMGEncGfx10NSA
8265 : AMDGPU::MIMGEncGfx10Default,
8266 NumVDataDwords, NumVAddrDwords);
8270 NumVDataDwords, NumVAddrDwords);
8273 "requested image instruction is not supported on this GPU");
8278 NumVDataDwords, NumVAddrDwords);
8281 NumVDataDwords, NumVAddrDwords);
8292 if (BaseOpcode->AtomicX2) {
8297 if (BaseOpcode->NoReturn)
8301 NumVDataDwords, IsAtomicPacked16Bit,
DL);
8319 if (!
Offset->isDivergent()) {
8364 return handleByteShortBufferLoads(DAG, VT,
DL, Ops, MMO);
8368 unsigned NumLoads = 1;
8374 if (NumElts == 8 || NumElts == 16) {
8375 NumLoads = NumElts / 4;
8383 setBufferOffsets(
Offset, DAG, &Ops[3],
8384 NumLoads > 1 ?
Align(16 * NumLoads) :
Align(4));
8387 for (
unsigned i = 0; i < NumLoads; ++i) {
8393 if (NumElts == 8 || NumElts == 16)
8440 EVT VT =
Op.getValueType();
8442 unsigned IntrinsicID =
Op.getConstantOperandVal(0);
8446 switch (IntrinsicID) {
8447 case Intrinsic::amdgcn_implicit_buffer_ptr: {
8450 return getPreloadedValue(DAG, *MFI, VT,
8453 case Intrinsic::amdgcn_dispatch_ptr:
8454 case Intrinsic::amdgcn_queue_ptr: {
8457 MF.
getFunction(),
"unsupported hsa intrinsic without hsa target",
8463 auto RegID = IntrinsicID == Intrinsic::amdgcn_dispatch_ptr ?
8465 return getPreloadedValue(DAG, *MFI, VT, RegID);
8467 case Intrinsic::amdgcn_implicitarg_ptr: {
8469 return getImplicitArgPtr(DAG,
DL);
8470 return getPreloadedValue(DAG, *MFI, VT,
8473 case Intrinsic::amdgcn_kernarg_segment_ptr: {
8479 return getPreloadedValue(DAG, *MFI, VT,
8482 case Intrinsic::amdgcn_dispatch_id: {
8485 case Intrinsic::amdgcn_rcp:
8487 case Intrinsic::amdgcn_rsq:
8489 case Intrinsic::amdgcn_rsq_legacy:
8493 case Intrinsic::amdgcn_rcp_legacy:
8497 case Intrinsic::amdgcn_rsq_clamp: {
8508 return DAG.
getNode(ISD::FMAXNUM,
DL, VT, Tmp,
8511 case Intrinsic::r600_read_ngroups_x:
8515 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
8518 case Intrinsic::r600_read_ngroups_y:
8522 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
8525 case Intrinsic::r600_read_ngroups_z:
8529 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
8532 case Intrinsic::r600_read_global_size_x:
8536 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
8539 case Intrinsic::r600_read_global_size_y:
8543 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
8546 case Intrinsic::r600_read_global_size_z:
8550 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
8553 case Intrinsic::r600_read_local_size_x:
8557 return lowerImplicitZextParam(DAG,
Op, MVT::i16,
8559 case Intrinsic::r600_read_local_size_y:
8563 return lowerImplicitZextParam(DAG,
Op, MVT::i16,
8565 case Intrinsic::r600_read_local_size_z:
8569 return lowerImplicitZextParam(DAG,
Op, MVT::i16,
8571 case Intrinsic::amdgcn_workgroup_id_x:
8572 return getPreloadedValue(DAG, *MFI, VT,
8574 case Intrinsic::amdgcn_workgroup_id_y:
8575 return getPreloadedValue(DAG, *MFI, VT,
8577 case Intrinsic::amdgcn_workgroup_id_z:
8578 return getPreloadedValue(DAG, *MFI, VT,
8580 case Intrinsic::amdgcn_wave_id:
8581 return lowerWaveID(DAG,
Op);
8582 case Intrinsic::amdgcn_lds_kernel_id: {
8584 return getLDSKernelId(DAG,
DL);
8585 return getPreloadedValue(DAG, *MFI, VT,
8588 case Intrinsic::amdgcn_workitem_id_x:
8589 return lowerWorkitemID(DAG,
Op, 0, MFI->getArgInfo().WorkItemIDX);
8590 case Intrinsic::amdgcn_workitem_id_y:
8591 return lowerWorkitemID(DAG,
Op, 1, MFI->getArgInfo().WorkItemIDY);
8592 case Intrinsic::amdgcn_workitem_id_z:
8593 return lowerWorkitemID(DAG,
Op, 2, MFI->getArgInfo().WorkItemIDZ);
8594 case Intrinsic::amdgcn_wavefrontsize:
8597 case Intrinsic::amdgcn_s_buffer_load: {
8598 unsigned CPol =
Op.getConstantOperandVal(3);
8605 return lowerSBuffer(VT,
DL,
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3),
8608 case Intrinsic::amdgcn_fdiv_fast:
8609 return lowerFDIV_FAST(
Op, DAG);
8610 case Intrinsic::amdgcn_sin:
8613 case Intrinsic::amdgcn_cos:
8616 case Intrinsic::amdgcn_mul_u24:
8618 case Intrinsic::amdgcn_mul_i24:
8621 case Intrinsic::amdgcn_log_clamp: {
8627 case Intrinsic::amdgcn_fract:
8630 case Intrinsic::amdgcn_class:
8632 Op.getOperand(1),
Op.getOperand(2));
8633 case Intrinsic::amdgcn_div_fmas:
8635 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3),
8638 case Intrinsic::amdgcn_div_fixup:
8640 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
8642 case Intrinsic::amdgcn_div_scale: {
8655 SDValue Src0 = Param->isAllOnes() ? Numerator : Denominator;
8658 Denominator, Numerator);
8660 case Intrinsic::amdgcn_icmp: {
8662 if (
Op.getOperand(1).getValueType() == MVT::i1 &&
8663 Op.getConstantOperandVal(2) == 0 &&
8668 case Intrinsic::amdgcn_fcmp: {
8671 case Intrinsic::amdgcn_ballot:
8673 case Intrinsic::amdgcn_fmed3:
8675 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
8676 case Intrinsic::amdgcn_fdot2:
8678 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3),
8680 case Intrinsic::amdgcn_fmul_legacy:
8682 Op.getOperand(1),
Op.getOperand(2));
8683 case Intrinsic::amdgcn_sffbh:
8685 case Intrinsic::amdgcn_sbfe:
8687 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
8688 case Intrinsic::amdgcn_ubfe:
8690 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
8691 case Intrinsic::amdgcn_cvt_pkrtz:
8692 case Intrinsic::amdgcn_cvt_pknorm_i16:
8693 case Intrinsic::amdgcn_cvt_pknorm_u16:
8694 case Intrinsic::amdgcn_cvt_pk_i16:
8695 case Intrinsic::amdgcn_cvt_pk_u16: {
8697 EVT VT =
Op.getValueType();
8700 if (IntrinsicID == Intrinsic::amdgcn_cvt_pkrtz)
8702 else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_i16)
8704 else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_u16)
8706 else if (IntrinsicID == Intrinsic::amdgcn_cvt_pk_i16)
8712 return DAG.
getNode(Opcode,
DL, VT,
Op.getOperand(1),
Op.getOperand(2));
8715 Op.getOperand(1),
Op.getOperand(2));
8718 case Intrinsic::amdgcn_fmad_ftz:
8720 Op.getOperand(2),
Op.getOperand(3));
8722 case Intrinsic::amdgcn_if_break:
8724 Op->getOperand(1),
Op->getOperand(2)), 0);
8726 case Intrinsic::amdgcn_groupstaticsize: {
8738 case Intrinsic::amdgcn_is_shared:
8739 case Intrinsic::amdgcn_is_private: {
8741 unsigned AS = (IntrinsicID == Intrinsic::amdgcn_is_shared) ?
8743 SDValue Aperture = getSegmentAperture(AS, SL, DAG);
8751 case Intrinsic::amdgcn_perm:
8753 Op.getOperand(2),
Op.getOperand(3));
8754 case Intrinsic::amdgcn_reloc_constant: {
8764 case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
8765 case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
8766 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
8767 case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
8768 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
8769 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
8770 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
8771 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8: {
8772 if (
Op.getOperand(4).getValueType() == MVT::i32)
8778 Op.getOperand(0),
Op.getOperand(1),
Op.getOperand(2),
8779 Op.getOperand(3), IndexKeyi32);
8781 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
8782 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
8783 case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4: {
8784 if (
Op.getOperand(6).getValueType() == MVT::i32)
8790 {Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
8791 Op.getOperand(3), Op.getOperand(4), Op.getOperand(5),
8792 IndexKeyi32, Op.getOperand(7)});
8794 case Intrinsic::amdgcn_addrspacecast_nonnull:
8795 return lowerADDRSPACECAST(
Op, DAG);
8796 case Intrinsic::amdgcn_readlane:
8797 case Intrinsic::amdgcn_readfirstlane:
8798 case Intrinsic::amdgcn_writelane:
8799 case Intrinsic::amdgcn_permlane16:
8800 case Intrinsic::amdgcn_permlanex16:
8801 case Intrinsic::amdgcn_permlane64:
8802 case Intrinsic::amdgcn_set_inactive:
8803 case Intrinsic::amdgcn_set_inactive_chain_arg:
8808 return lowerImage(
Op, ImageDimIntr, DAG,
false);
8819 return DAG.
getRegister(AMDGPU::SGPR_NULL, MVT::i32);
8825 unsigned NewOpcode)
const {
8829 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
8830 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
8848 M->getMemOperand());
8853 unsigned NewOpcode)
const {
8857 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
8858 auto Offsets = splitBufferOffsets(
Op.getOperand(5), DAG);
8876 M->getMemOperand());
8881 unsigned IntrID =
Op.getConstantOperandVal(1);
8885 case Intrinsic::amdgcn_ds_ordered_add:
8886 case Intrinsic::amdgcn_ds_ordered_swap: {
8891 unsigned IndexOperand = M->getConstantOperandVal(7);
8892 unsigned WaveRelease = M->getConstantOperandVal(8);
8893 unsigned WaveDone = M->getConstantOperandVal(9);
8895 unsigned OrderedCountIndex = IndexOperand & 0x3f;
8896 IndexOperand &= ~0x3f;
8897 unsigned CountDw = 0;
8900 CountDw = (IndexOperand >> 24) & 0xf;
8901 IndexOperand &= ~(0xf << 24);
8903 if (CountDw < 1 || CountDw > 4) {
8905 "ds_ordered_count: dword count must be between 1 and 4");
8912 if (WaveDone && !WaveRelease)
8915 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
8916 unsigned ShaderType =
8918 unsigned Offset0 = OrderedCountIndex << 2;
8919 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
8922 Offset1 |= (CountDw - 1) << 6;
8925 Offset1 |= ShaderType << 2;
8927 unsigned Offset = Offset0 | (Offset1 << 8);
8936 M->getVTList(), Ops, M->getMemoryVT(),
8937 M->getMemOperand());
8939 case Intrinsic::amdgcn_raw_buffer_load:
8940 case Intrinsic::amdgcn_raw_ptr_buffer_load:
8941 case Intrinsic::amdgcn_raw_atomic_buffer_load:
8942 case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
8943 case Intrinsic::amdgcn_raw_buffer_load_format:
8944 case Intrinsic::amdgcn_raw_ptr_buffer_load_format: {
8945 const bool IsFormat =
8946 IntrID == Intrinsic::amdgcn_raw_buffer_load_format ||
8947 IntrID == Intrinsic::amdgcn_raw_ptr_buffer_load_format;
8949 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
8950 auto Offsets = splitBufferOffsets(
Op.getOperand(3), DAG);
8964 return lowerIntrinsicLoad(M, IsFormat, DAG, Ops);
8966 case Intrinsic::amdgcn_struct_buffer_load:
8967 case Intrinsic::amdgcn_struct_ptr_buffer_load:
8968 case Intrinsic::amdgcn_struct_buffer_load_format:
8969 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
8970 case Intrinsic::amdgcn_struct_atomic_buffer_load:
8971 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
8972 const bool IsFormat =
8973 IntrID == Intrinsic::amdgcn_struct_buffer_load_format ||
8974 IntrID == Intrinsic::amdgcn_struct_ptr_buffer_load_format;
8976 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
8977 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
8992 case Intrinsic::amdgcn_raw_tbuffer_load:
8993 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
8995 EVT LoadVT =
Op.getValueType();
8996 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
8997 auto Offsets = splitBufferOffsets(
Op.getOperand(3), DAG);
9016 Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
9019 case Intrinsic::amdgcn_struct_tbuffer_load:
9020 case Intrinsic::amdgcn_struct_ptr_tbuffer_load: {
9022 EVT LoadVT =
Op.getValueType();
9023 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
9024 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
9043 Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
9046 case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
9047 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd:
9049 case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
9050 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd:
9052 case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
9053 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin:
9055 case Intrinsic::amdgcn_struct_buffer_atomic_fmin:
9056 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmin:
9058 case Intrinsic::amdgcn_raw_buffer_atomic_fmax:
9059 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax:
9061 case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
9062 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmax:
9064 case Intrinsic::amdgcn_raw_buffer_atomic_swap:
9065 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
9067 case Intrinsic::amdgcn_raw_buffer_atomic_add:
9068 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_add:
9070 case Intrinsic::amdgcn_raw_buffer_atomic_sub:
9071 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub:
9073 case Intrinsic::amdgcn_raw_buffer_atomic_smin:
9074 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin:
9076 case Intrinsic::amdgcn_raw_buffer_atomic_umin:
9077 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin:
9079 case Intrinsic::amdgcn_raw_buffer_atomic_smax:
9080 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax:
9082 case Intrinsic::amdgcn_raw_buffer_atomic_umax:
9083 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax:
9085 case Intrinsic::amdgcn_raw_buffer_atomic_and:
9086 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_and:
9088 case Intrinsic::amdgcn_raw_buffer_atomic_or:
9089 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_or:
9091 case Intrinsic::amdgcn_raw_buffer_atomic_xor:
9092 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor:
9094 case Intrinsic::amdgcn_raw_buffer_atomic_inc:
9095 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_inc:
9097 case Intrinsic::amdgcn_raw_buffer_atomic_dec:
9098 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_dec:
9100 case Intrinsic::amdgcn_raw_buffer_atomic_cond_sub_u32:
9101 return lowerRawBufferAtomicIntrin(
Op, DAG,
9103 case Intrinsic::amdgcn_struct_buffer_atomic_swap:
9104 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_swap:
9105 return lowerStructBufferAtomicIntrin(
Op, DAG,
9107 case Intrinsic::amdgcn_struct_buffer_atomic_add:
9108 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_add:
9110 case Intrinsic::amdgcn_struct_buffer_atomic_sub:
9111 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_sub:
9113 case Intrinsic::amdgcn_struct_buffer_atomic_smin:
9114 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smin:
9115 return lowerStructBufferAtomicIntrin(
Op, DAG,
9117 case Intrinsic::amdgcn_struct_buffer_atomic_umin:
9118 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umin:
9119 return lowerStructBufferAtomicIntrin(
Op, DAG,
9121 case Intrinsic::amdgcn_struct_buffer_atomic_smax:
9122 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smax:
9123 return lowerStructBufferAtomicIntrin(
Op, DAG,
9125 case Intrinsic::amdgcn_struct_buffer_atomic_umax:
9126 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umax:
9127 return lowerStructBufferAtomicIntrin(
Op, DAG,
9129 case Intrinsic::amdgcn_struct_buffer_atomic_and:
9130 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_and:
9132 case Intrinsic::amdgcn_struct_buffer_atomic_or:
9133 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_or:
9135 case Intrinsic::amdgcn_struct_buffer_atomic_xor:
9136 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_xor:
9138 case Intrinsic::amdgcn_struct_buffer_atomic_inc:
9139 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_inc:
9141 case Intrinsic::amdgcn_struct_buffer_atomic_dec:
9142 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_dec:
9144 case Intrinsic::amdgcn_struct_buffer_atomic_cond_sub_u32:
9145 return lowerStructBufferAtomicIntrin(
Op, DAG,
9148 case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
9149 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap: {
9150 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(4), DAG);
9151 auto Offsets = splitBufferOffsets(
Op.getOperand(5), DAG);
9165 EVT VT =
Op.getValueType();
9169 Op->getVTList(), Ops, VT, M->getMemOperand());
9171 case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
9172 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_cmpswap: {
9173 SDValue Rsrc = bufferRsrcPtrToVector(
Op->getOperand(4), DAG);
9174 auto Offsets = splitBufferOffsets(
Op.getOperand(6), DAG);
9188 EVT VT =
Op.getValueType();
9192 Op->getVTList(), Ops, VT, M->getMemOperand());
9194 case Intrinsic::amdgcn_image_bvh_intersect_ray: {
9203 assert(NodePtr.getValueType() == MVT::i32 ||
9204 NodePtr.getValueType() == MVT::i64);
9217 const bool Is64 = NodePtr.getValueType() == MVT::i64;
9218 const unsigned NumVDataDwords = 4;
9219 const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
9220 const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords;
9224 const unsigned BaseOpcodes[2][2] = {
9225 {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
9226 {AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
9227 AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
9231 IsGFX12Plus ? AMDGPU::MIMGEncGfx12
9232 : IsGFX11 ? AMDGPU::MIMGEncGfx11NSA
9233 : AMDGPU::MIMGEncGfx10NSA,
9234 NumVDataDwords, NumVAddrDwords);
9238 IsGFX11 ? AMDGPU::MIMGEncGfx11Default
9239 : AMDGPU::MIMGEncGfx10Default,
9240 NumVDataDwords, NumVAddrDwords);
9246 auto packLanes = [&DAG, &Ops, &
DL] (
SDValue Op,
bool IsAligned) {
9249 if (Lanes[0].getValueSizeInBits() == 32) {
9250 for (
unsigned I = 0;
I < 3; ++
I)
9257 { Lanes[0], Lanes[1] })));
9264 { Elt0, Lanes[0] })));
9268 { Lanes[1], Lanes[2] })));
9273 if (UseNSA && IsGFX11Plus) {
9281 for (
unsigned I = 0;
I < 3; ++
I) {
9284 {DirLanes[I], InvDirLanes[I]})));
9299 packLanes(RayOrigin,
true);
9300 packLanes(RayDir,
true);
9301 packLanes(RayInvDir,
false);
9306 if (NumVAddrDwords > 12) {
9326 case Intrinsic::amdgcn_global_atomic_fmin:
9327 case Intrinsic::amdgcn_global_atomic_fmax:
9328 case Intrinsic::amdgcn_global_atomic_fmin_num:
9329 case Intrinsic::amdgcn_global_atomic_fmax_num:
9330 case Intrinsic::amdgcn_flat_atomic_fmin:
9331 case Intrinsic::amdgcn_flat_atomic_fmax:
9332 case Intrinsic::amdgcn_flat_atomic_fmin_num:
9333 case Intrinsic::amdgcn_flat_atomic_fmax_num: {
9340 unsigned Opcode = 0;
9342 case Intrinsic::amdgcn_global_atomic_fmin:
9343 case Intrinsic::amdgcn_global_atomic_fmin_num:
9344 case Intrinsic::amdgcn_flat_atomic_fmin:
9345 case Intrinsic::amdgcn_flat_atomic_fmin_num: {
9346 Opcode = ISD::ATOMIC_LOAD_FMIN;
9349 case Intrinsic::amdgcn_global_atomic_fmax:
9350 case Intrinsic::amdgcn_global_atomic_fmax_num:
9351 case Intrinsic::amdgcn_flat_atomic_fmax:
9352 case Intrinsic::amdgcn_flat_atomic_fmax_num: {
9353 Opcode = ISD::ATOMIC_LOAD_FMAX;
9360 Ops, M->getMemOperand());
9362 case Intrinsic::amdgcn_s_get_barrier_state: {
9366 bool IsInlinableBarID =
false;
9374 if (IsInlinableBarID) {
9375 Opc = AMDGPU::S_GET_BARRIER_STATE_IMM;
9380 Opc = AMDGPU::S_GET_BARRIER_STATE_M0;
9392 return lowerImage(
Op, ImageDimIntr, DAG,
true);
9400SDValue SITargetLowering::getMemIntrinsicNode(
unsigned Opcode,
const SDLoc &
DL,
9410 bool IsTFE = VTList.
NumVTs == 3;
9413 unsigned NumOpDWords = NumValueDWords + 1;
9418 SDValue Op = getMemIntrinsicNode(Opcode,
DL, OpDWordsVTList, Ops,
9419 OpDWordsVT, OpDWordsMMO, DAG);
9434 (VT == MVT::v3i32 || VT == MVT::v3f32)) {
9440 WidenedMemVT, WidenedMMO);
9450 bool ImageStore)
const {
9485 for (
unsigned I = 0;
I < Elts.
size() / 2;
I += 1) {
9491 if ((NumElements % 2) == 1) {
9493 unsigned I = Elts.
size() / 2;
9509 if (NumElements == 3) {
9519 return DAG.
getNode(ISD::BITCAST,
DL, WidenedStoreVT, ZExt);
9530 unsigned IntrinsicID =
Op.getConstantOperandVal(1);
9533 switch (IntrinsicID) {
9534 case Intrinsic::amdgcn_exp_compr: {
9538 "intrinsic not supported on subtarget",
DL.getDebugLoc());
9551 DAG.
getNode(ISD::BITCAST,
DL, MVT::f32, Src0),
9552 DAG.
getNode(ISD::BITCAST,
DL, MVT::f32, Src1),
9561 unsigned Opc =
Done->isZero() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
9564 case Intrinsic::amdgcn_s_barrier: {
9567 unsigned WGSize =
ST.getFlatWorkGroupSizes(MF.
getFunction()).second;
9568 if (WGSize <=
ST.getWavefrontSize())
9570 Op.getOperand(0)), 0);
9574 if (
ST.hasSplitBarriers()) {
9579 MVT::Other, K,
Op.getOperand(0)),
9591 case Intrinsic::amdgcn_struct_tbuffer_store:
9592 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
9594 bool IsD16 = (VData.
getValueType().getScalarType() == MVT::f16);
9596 VData = handleD16VData(VData, DAG);
9597 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
9598 auto Offsets = splitBufferOffsets(
Op.getOperand(5), DAG);
9616 M->getMemoryVT(), M->getMemOperand());
9619 case Intrinsic::amdgcn_raw_tbuffer_store:
9620 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
9622 bool IsD16 = (VData.
getValueType().getScalarType() == MVT::f16);
9624 VData = handleD16VData(VData, DAG);
9625 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
9626 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
9644 M->getMemoryVT(), M->getMemOperand());
9647 case Intrinsic::amdgcn_raw_buffer_store:
9648 case Intrinsic::amdgcn_raw_ptr_buffer_store:
9649 case Intrinsic::amdgcn_raw_buffer_store_format:
9650 case Intrinsic::amdgcn_raw_ptr_buffer_store_format: {
9651 const bool IsFormat =
9652 IntrinsicID == Intrinsic::amdgcn_raw_buffer_store_format ||
9653 IntrinsicID == Intrinsic::amdgcn_raw_ptr_buffer_store_format;
9660 VData = handleD16VData(VData, DAG);
9670 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
9671 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
9691 return handleByteShortBufferStores(DAG, VDataVT,
DL, Ops, M);
9694 M->getMemoryVT(), M->getMemOperand());
9697 case Intrinsic::amdgcn_struct_buffer_store:
9698 case Intrinsic::amdgcn_struct_ptr_buffer_store:
9699 case Intrinsic::amdgcn_struct_buffer_store_format:
9700 case Intrinsic::amdgcn_struct_ptr_buffer_store_format: {
9701 const bool IsFormat =
9702 IntrinsicID == Intrinsic::amdgcn_struct_buffer_store_format ||
9703 IntrinsicID == Intrinsic::amdgcn_struct_ptr_buffer_store_format;
9711 VData = handleD16VData(VData, DAG);
9721 auto Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
9722 auto Offsets = splitBufferOffsets(
Op.getOperand(5), DAG);
9743 return handleByteShortBufferStores(DAG, VDataType,
DL, Ops, M);
9746 M->getMemoryVT(), M->getMemOperand());
9748 case Intrinsic::amdgcn_raw_buffer_load_lds:
9749 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
9750 case Intrinsic::amdgcn_struct_buffer_load_lds:
9751 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
9755 IntrinsicID == Intrinsic::amdgcn_struct_buffer_load_lds ||
9756 IntrinsicID == Intrinsic::amdgcn_struct_ptr_buffer_load_lds;
9757 unsigned OpOffset = HasVIndex ? 1 : 0;
9758 SDValue VOffset =
Op.getOperand(5 + OpOffset);
9760 unsigned Size =
Op->getConstantOperandVal(4);
9766 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
9767 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
9768 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
9769 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
9772 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
9773 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
9774 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
9775 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
9778 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
9779 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
9780 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
9781 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
9789 if (HasVIndex && HasVOffset)
9795 else if (HasVOffset)
9798 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
9802 unsigned Aux =
Op.getConstantOperandVal(8 + OpOffset);
9837 case Intrinsic::amdgcn_global_load_lds: {
9839 unsigned Size =
Op->getConstantOperandVal(4);
9844 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
9847 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
9850 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
9867 if (LHS->isDivergent())
9871 RHS.getOperand(0).getValueType() == MVT::i32) {
9879 if (!
Addr->isDivergent()) {
9895 LoadPtrI.
Offset =
Op->getConstantOperandVal(5);
9915 case Intrinsic::amdgcn_end_cf:
9917 Op->getOperand(2), Chain), 0);
9918 case Intrinsic::amdgcn_s_barrier_init:
9919 case Intrinsic::amdgcn_s_barrier_join:
9920 case Intrinsic::amdgcn_s_wakeup_barrier: {
9925 bool IsInlinableBarID =
false;
9933 if (IsInlinableBarID) {
9934 switch (IntrinsicID) {
9937 case Intrinsic::amdgcn_s_barrier_init:
9938 Opc = AMDGPU::S_BARRIER_INIT_IMM;
9940 case Intrinsic::amdgcn_s_barrier_join:
9941 Opc = AMDGPU::S_BARRIER_JOIN_IMM;
9943 case Intrinsic::amdgcn_s_wakeup_barrier:
9944 Opc = AMDGPU::S_WAKEUP_BARRIER_IMM;
9951 switch (IntrinsicID) {
9954 case Intrinsic::amdgcn_s_barrier_init:
9955 Opc = AMDGPU::S_BARRIER_INIT_M0;
9957 case Intrinsic::amdgcn_s_barrier_join:
9958 Opc = AMDGPU::S_BARRIER_JOIN_M0;
9960 case Intrinsic::amdgcn_s_wakeup_barrier:
9961 Opc = AMDGPU::S_WAKEUP_BARRIER_M0;
9966 if (IntrinsicID == Intrinsic::amdgcn_s_barrier_init) {
9972 if (!IsInlinableBarID) {
9977 Op.getOperand(2), M0Val),
9981 }
else if (IsInlinableBarID) {
9990 case Intrinsic::amdgcn_s_prefetch_data: {
9993 return Op.getOperand(0);
9996 case Intrinsic::amdgcn_s_buffer_prefetch_data: {
9998 Chain, bufferRsrcPtrToVector(
Op.getOperand(2), DAG),
10005 Op->getVTList(), Ops, M->getMemoryVT(),
10006 M->getMemOperand());
10011 return lowerImage(
Op, ImageDimIntr, DAG,
true);
10024std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
10048 unsigned Overflow = ImmOffset & ~MaxImm;
10049 ImmOffset -= Overflow;
10050 if ((int32_t)Overflow < 0) {
10051 Overflow += ImmOffset;
10056 auto OverflowVal = DAG.
getConstant(Overflow,
DL, MVT::i32);
10060 SDValue Ops[] = { N0, OverflowVal };
10075void SITargetLowering::setBufferOffsets(
SDValue CombinedOffset,
10077 Align Alignment)
const {
10083 if (
TII->splitMUBUFOffset(Imm, SOffset, ImmOffset, Alignment)) {
10096 TII->splitMUBUFOffset(
Offset, SOffset, ImmOffset, Alignment)) {
10113SDValue SITargetLowering::bufferRsrcPtrToVector(
SDValue MaybePointer,
10116 return MaybePointer;
10130 SDValue NumRecords =
Op->getOperand(3);
10133 auto [LowHalf, HighHalf] = DAG.
SplitScalar(Pointer, Loc, MVT::i32, MVT::i32);
10136 std::optional<uint32_t> ConstStride = std::nullopt;
10138 ConstStride = ConstNode->getZExtValue();
10141 if (!ConstStride || *ConstStride != 0) {
10144 ShiftedStride = DAG.
getConstant(*ConstStride << 16, Loc, MVT::i32);
10155 NewHighHalf, NumRecords, Flags);
10156 SDValue RsrcPtr = DAG.
getNode(ISD::BITCAST, Loc, MVT::i128, Rsrc);
10165 bool IsTFE)
const {
10175 SDValue Op = getMemIntrinsicNode(Opc,
DL, VTs, Ops, MVT::v2i32, OpMMO, DAG);
10192 LoadVal = DAG.
getNode(ISD::BITCAST,
DL, LoadVT, LoadVal);
10202 if (VDataType == MVT::f16 || VDataType == MVT::bf16)
10203 Ops[1] = DAG.
getNode(ISD::BITCAST,
DL, MVT::i16, Ops[1]);
10206 Ops[1] = BufferStoreExt;
10211 M->getMemOperand());
10236SDValue SITargetLowering::widenLoad(
LoadSDNode *Ld, DAGCombinerInfo &DCI)
const {
10252 if ((MemVT.
isSimple() && !DCI.isAfterLegalizeDAG()) ||
10259 "unexpected vector extload");
10272 "unexpected fp extload");
10290 DCI.AddToWorklist(Cvt.
getNode());
10295 DCI.AddToWorklist(Cvt.
getNode());
10298 Cvt = DAG.
getNode(ISD::BITCAST, SL, VT, Cvt);
10306 if (Info.isEntryFunction())
10307 return Info.getUserSGPRInfo().hasFlatScratchInit();
10315 EVT MemVT =
Load->getMemoryVT();
10328 EVT RealMemVT = (MemVT == MVT::i1) ? MVT::i8 :
MVT::i16;
10331 BasePtr, RealMemVT, MMO);
10361 assert(
Op.getValueType().getVectorElementType() == MVT::i32 &&
10362 "Custom lowering for non-i32 vectors hasn't been implemented.");
10365 unsigned AS =
Load->getAddressSpace();
10388 Alignment >=
Align(4) && NumElements < 32) {
10403 if (NumElements > 4)
10423 if (NumElements > 2)
10428 if (NumElements > 4)
10440 auto Flags =
Load->getMemOperand()->getFlags();
10442 Load->getAlign(), Flags, &
Fast) &&
10451 MemVT, *
Load->getMemOperand())) {
10461 EVT VT =
Op.getValueType();
10488 return DAG.
getNode(ISD::BITCAST,
DL, VT, Res);
10498 EVT VT =
Op.getValueType();
10501 bool AllowInaccurateRcp = Flags.hasApproximateFuncs() ||
10508 if (!AllowInaccurateRcp && VT != MVT::f16)
10511 if (CLHS->isExactlyValue(1.0)) {
10528 if (CLHS->isExactlyValue(-1.0)) {
10537 if (!AllowInaccurateRcp && (VT != MVT::f16 || !Flags.hasAllowReciprocal()))
10551 EVT VT =
Op.getValueType();
10554 bool AllowInaccurateDiv = Flags.hasApproximateFuncs() ||
10556 if (!AllowInaccurateDiv)
10577 return DAG.
getNode(Opcode, SL, VT,
A,
B, Flags);
10590 return DAG.
getNode(Opcode, SL, VTList,
10599 return DAG.
getNode(Opcode, SL, VT, {
A,
B,
C}, Flags);
10612 return DAG.
getNode(Opcode, SL, VTList,
10618 if (
SDValue FastLowered = lowerFastUnsafeFDIV(
Op, DAG))
10619 return FastLowered;
10625 SDValue CvtSrc0 = DAG.
getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
10626 SDValue CvtSrc1 = DAG.
getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
10646 const APFloat K0Val(0x1p+96f);
10649 const APFloat K1Val(0x1p-32f);
10676 assert(ST->hasDenormModeInst() &&
"Requires S_DENORM_MODE");
10677 uint32_t DPDenormModeDefault = Info->getMode().fpDenormModeDPValue();
10678 uint32_t Mode = SPDenormMode | (DPDenormModeDefault << 2);
10683 if (
SDValue FastLowered = lowerFastUnsafeFDIV(
Op, DAG))
10684 return FastLowered;
10691 Flags.setNoFPExcept(
true);
10702 {
RHS,
RHS, LHS}, Flags);
10704 {
LHS,
RHS, LHS}, Flags);
10708 DenominatorScaled, Flags);
10710 DenominatorScaled, Flags);
10712 using namespace AMDGPU::Hwreg;
10713 const unsigned Denorm32Reg = HwregEncoding::encode(ID_MODE, 4, 2);
10718 const DenormalMode DenormMode = Info->getMode().FP32Denormals;
10721 const bool HasDynamicDenormals =
10727 if (!PreservesDenormals) {
10735 if (HasDynamicDenormals) {
10739 SavedDenormMode =
SDValue(GetReg, 0);
10747 const SDValue EnableDenormValue =
10756 EnableDenorm = DAG.
getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
10757 {EnableDenormValue,
BitField, Glue});
10770 ApproxRcp, One, NegDivScale0, Flags);
10773 ApproxRcp, Fma0, Flags);
10776 Fma1, Fma1, Flags);
10779 NumeratorScaled,
Mul, Flags);
10782 Fma2, Fma1,
Mul, Fma2, Flags);
10785 NumeratorScaled, Fma3, Flags);
10787 if (!PreservesDenormals) {
10794 Fma4.
getValue(1), DisableDenormValue,
10797 assert(HasDynamicDenormals == (
bool)SavedDenormMode);
10798 const SDValue DisableDenormValue =
10799 HasDynamicDenormals
10804 AMDGPU::S_SETREG_B32, SL, MVT::Other,
10815 {Fma4, Fma1, Fma3, Scale}, Flags);
10821 if (
SDValue FastLowered = lowerFastUnsafeFDIV64(
Op, DAG))
10822 return FastLowered;
10834 SDValue NegDivScale0 = DAG.
getNode(ISD::FNEG, SL, MVT::f64, DivScale0);
10850 NegDivScale0,
Mul, DivScale1);
10863 SDValue Scale0BC = DAG.
getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0);
10864 SDValue Scale1BC = DAG.
getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1);
10882 Fma4, Fma3,
Mul, Scale);
10888 EVT VT =
Op.getValueType();
10890 if (VT == MVT::f32)
10891 return LowerFDIV32(
Op, DAG);
10893 if (VT == MVT::f64)
10894 return LowerFDIV64(
Op, DAG);
10896 if (VT == MVT::f16)
10897 return LowerFDIV16(
Op, DAG);
10906 EVT ResultExpVT =
Op->getValueType(1);
10907 EVT InstrExpVT = VT == MVT::f16 ? MVT::i16 : MVT::i32;
10937 if (VT == MVT::i1) {
10940 Store->getBasePtr(), MVT::i1,
Store->getMemOperand());
10944 Store->getValue().getValueType().getScalarType() == MVT::i32);
10946 unsigned AS =
Store->getAddressSpace();
10965 if (NumElements > 4)
10972 VT, *
Store->getMemOperand()))
10982 if (NumElements > 2)
10986 if (NumElements > 4 ||
10995 auto Flags =
Store->getMemOperand()->getFlags();
11017 DAG.
getNode(ISD::FP_EXTEND, SL, MVT::f32,
Op.getOperand(0), Flags);
11030 MVT VT =
Op.getValueType().getSimpleVT();
11059 SDValue SqrtSNextDown = DAG.
getNode(ISD::BITCAST,
DL, VT, SqrtSNextDownInt);
11062 DAG.
getNode(ISD::FNEG,
DL, VT, SqrtSNextDown, Flags);
11071 SDValue NegSqrtSNextUp = DAG.
getNode(ISD::FNEG,
DL, VT, SqrtSNextUp, Flags);
11182 SqrtRet = DAG.
getNode(ISD::FLDEXP,
DL, MVT::f64, SqrtRet, ScaleDown, Flags);
11199 EVT VT =
Op.getValueType();
11205 auto Flags =
Op->getFlags();
11216 switch (
Op.getOpcode()) {
11242 EVT VT =
Op.getValueType();
11258 DAGCombinerInfo &DCI)
const {
11259 EVT VT =
N->getValueType(0);
11261 if (ScalarVT != MVT::f32 && ScalarVT != MVT::f16)
11268 EVT SrcVT = Src.getValueType();
11274 if (DCI.isAfterLegalizeDAG() && SrcVT == MVT::i32) {
11277 DCI.AddToWorklist(Cvt.
getNode());
11280 if (ScalarVT != MVT::f32) {
11292 DAGCombinerInfo &DCI)
const {
11302 SDValue MagAsVector = DAG.
getNode(ISD::BITCAST,
DL, MVT::v2f32, MagnitudeOp);
11352 unsigned AddrSpace,
11354 DAGCombinerInfo &DCI)
const {
11384 AM.HasBaseReg =
true;
11385 AM.BaseOffs =
Offset.getSExtValue();
11390 EVT VT =
N->getValueType(0);
11407 switch (
N->getOpcode()) {
11418 DAGCombinerInfo &DCI)
const {
11427 SDValue NewPtr = performSHLPtrCombine(
Ptr.getNode(),
N->getAddressSpace(),
11428 N->getMemoryVT(), DCI);
11432 NewOps[PtrIdx] = NewPtr;
11441 return (Opc ==
ISD::AND && (Val == 0 || Val == 0xffffffff)) ||
11442 (Opc ==
ISD::OR && (Val == 0xffffffff || Val == 0)) ||
11451SDValue SITargetLowering::splitBinaryBitConstantOp(
11452 DAGCombinerInfo &DCI,
11474 if (V.getValueType() != MVT::i1)
11476 switch (V.getOpcode()) {
11495 if (!(
C & 0x000000ff)) ZeroByteMask |= 0x000000ff;
11496 if (!(
C & 0x0000ff00)) ZeroByteMask |= 0x0000ff00;
11497 if (!(
C & 0x00ff0000)) ZeroByteMask |= 0x00ff0000;
11498 if (!(
C & 0xff000000)) ZeroByteMask |= 0xff000000;
11499 uint32_t NonZeroByteMask = ~ZeroByteMask;
11500 if ((NonZeroByteMask &
C) != NonZeroByteMask)
11513 assert(V.getValueSizeInBits() == 32);
11515 if (V.getNumOperands() != 2)
11524 switch (V.getOpcode()) {
11529 return (0x03020100 & ConstMask) | (0x0c0c0c0c & ~ConstMask);
11534 return (0x03020100 & ~ConstMask) | ConstMask;
11541 return uint32_t((0x030201000c0c0c0cull <<
C) >> 32);
11547 return uint32_t(0x0c0c0c0c03020100ull >>
C);
11554 DAGCombinerInfo &DCI)
const {
11555 if (DCI.isBeforeLegalize())
11559 EVT VT =
N->getValueType(0);
11565 if (VT == MVT::i64 && CRHS) {
11571 if (CRHS && VT == MVT::i32) {
11581 unsigned Shift = CShift->getZExtValue();
11583 unsigned Offset = NB + Shift;
11584 if ((
Offset & (Bits - 1)) == 0) {
11587 LHS->getOperand(0),
11608 Sel = (LHS.getConstantOperandVal(2) & Sel) | (~Sel & 0x0c0c0c0c);
11623 if (
Y.getOpcode() != ISD::FABS ||
Y.getOperand(0) !=
X ||
11628 if (
X != LHS.getOperand(1))
11666 (RHS.getOperand(0) == LHS.getOperand(0) &&
11667 LHS.getOperand(0) == LHS.getOperand(1))) {
11670 Mask->getZExtValue() & ~OrdMask :
11671 Mask->getZExtValue() & OrdMask;
11679 if (VT == MVT::i32 &&
11691 if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
11692 N->isDivergent() &&
TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
11695 if (LHSMask != ~0u && RHSMask != ~0u) {
11698 if (LHSMask > RHSMask) {
11705 uint32_t LHSUsedLanes = ~(LHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
11706 uint32_t RHSUsedLanes = ~(RHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
11709 if (!(LHSUsedLanes & RHSUsedLanes) &&
11712 !(LHSUsedLanes == 0x0c0c0000 && RHSUsedLanes == 0x00000c0c)) {
11719 for (
unsigned I = 0;
I < 32;
I += 8) {
11721 if ((LHSMask & ByteSel) == 0x0c || (RHSMask & ByteSel) == 0x0c)
11722 Mask &= (0x0c <<
I) & 0xffffffff;
11731 LHS.getOperand(0), RHS.getOperand(0),
11780static const std::optional<ByteProvider<SDValue>>
11782 unsigned Depth = 0) {
11785 return std::nullopt;
11787 if (
Op.getValueSizeInBits() < 8)
11788 return std::nullopt;
11790 if (
Op.getValueType().isVector())
11793 switch (
Op->getOpcode()) {
11805 NarrowVT = VTSign->getVT();
11808 return std::nullopt;
11811 if (SrcIndex >= NarrowByteWidth)
11812 return std::nullopt;
11820 return std::nullopt;
11822 uint64_t BitShift = ShiftOp->getZExtValue();
11824 if (BitShift % 8 != 0)
11825 return std::nullopt;
11827 SrcIndex += BitShift / 8;
11845static const std::optional<ByteProvider<SDValue>>
11847 unsigned StartingIndex = 0) {
11851 return std::nullopt;
11853 unsigned BitWidth =
Op.getScalarValueSizeInBits();
11855 return std::nullopt;
11857 return std::nullopt;
11859 bool IsVec =
Op.getValueType().isVector();
11860 switch (
Op.getOpcode()) {
11863 return std::nullopt;
11868 return std::nullopt;
11872 return std::nullopt;
11875 if (!LHS->isConstantZero() && !RHS->isConstantZero())
11876 return std::nullopt;
11877 if (!LHS || LHS->isConstantZero())
11879 if (!RHS || RHS->isConstantZero())
11881 return std::nullopt;
11886 return std::nullopt;
11890 return std::nullopt;
11892 uint32_t BitMask = BitMaskOp->getZExtValue();
11896 if ((IndexMask & BitMask) != IndexMask) {
11899 if (IndexMask & BitMask)
11900 return std::nullopt;
11909 return std::nullopt;
11913 if (!ShiftOp ||
Op.getValueType().isVector())
11914 return std::nullopt;
11916 uint64_t BitsProvided =
Op.getValueSizeInBits();
11917 if (BitsProvided % 8 != 0)
11918 return std::nullopt;
11920 uint64_t BitShift = ShiftOp->getAPIntValue().urem(BitsProvided);
11922 return std::nullopt;
11924 uint64_t ConcatSizeInBytes = BitsProvided / 4;
11925 uint64_t ByteShift = BitShift / 8;
11927 uint64_t NewIndex = (
Index + ByteShift) % ConcatSizeInBytes;
11928 uint64_t BytesProvided = BitsProvided / 8;
11929 SDValue NextOp =
Op.getOperand(NewIndex >= BytesProvided ? 0 : 1);
11930 NewIndex %= BytesProvided;
11937 return std::nullopt;
11941 return std::nullopt;
11943 uint64_t BitShift = ShiftOp->getZExtValue();
11945 return std::nullopt;
11947 auto BitsProvided =
Op.getScalarValueSizeInBits();
11948 if (BitsProvided % 8 != 0)
11949 return std::nullopt;
11951 uint64_t BytesProvided = BitsProvided / 8;
11952 uint64_t ByteShift = BitShift / 8;
11957 return BytesProvided - ByteShift >
Index
11965 return std::nullopt;
11969 return std::nullopt;
11971 uint64_t BitShift = ShiftOp->getZExtValue();
11972 if (BitShift % 8 != 0)
11973 return std::nullopt;
11974 uint64_t ByteShift = BitShift / 8;
11980 return Index < ByteShift
11983 Depth + 1, StartingIndex);
11992 return std::nullopt;
12000 NarrowBitWidth = VTSign->getVT().getSizeInBits();
12002 if (NarrowBitWidth % 8 != 0)
12003 return std::nullopt;
12004 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
12006 if (
Index >= NarrowByteWidth)
12008 ? std::optional<ByteProvider<SDValue>>(
12016 return std::nullopt;
12020 if (NarrowByteWidth >=
Index) {
12025 return std::nullopt;
12032 return std::nullopt;
12038 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
12039 if (NarrowBitWidth % 8 != 0)
12040 return std::nullopt;
12041 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
12046 if (
Index >= NarrowByteWidth) {
12048 ? std::optional<ByteProvider<SDValue>>(
12053 if (NarrowByteWidth >
Index) {
12057 return std::nullopt;
12062 return std::nullopt;
12065 Depth + 1, StartingIndex);
12071 return std::nullopt;
12072 auto VecIdx = IdxOp->getZExtValue();
12073 auto ScalarSize =
Op.getScalarValueSizeInBits();
12074 if (ScalarSize < 32)
12075 Index = ScalarSize == 8 ? VecIdx : VecIdx * 2 +
Index;
12077 StartingIndex,
Index);
12082 return std::nullopt;
12086 return std::nullopt;
12089 (PermMask->getZExtValue() & (0xFF << (
Index * 8))) >> (
Index * 8);
12090 if (IdxMask > 0x07 && IdxMask != 0x0c)
12091 return std::nullopt;
12093 auto NextOp =
Op.getOperand(IdxMask > 0x03 ? 0 : 1);
12094 auto NextIndex = IdxMask > 0x03 ? IdxMask % 4 : IdxMask;
12096 return IdxMask != 0x0c ?
calculateSrcByte(NextOp, StartingIndex, NextIndex)
12102 return std::nullopt;
12117 return !OpVT.
isVector() && OpVT.getSizeInBits() == 16;
12124 auto MemVT = L->getMemoryVT();
12127 return L->getMemoryVT().getSizeInBits() == 16;
12137 int Low8 = Mask & 0xff;
12138 int Hi8 = (Mask & 0xff00) >> 8;
12140 assert(Low8 < 8 && Hi8 < 8);
12142 bool IsConsecutive = (Hi8 - Low8 == 1);
12147 bool Is16Aligned = !(Low8 % 2);
12149 return IsConsecutive && Is16Aligned;
12157 int Low16 = PermMask & 0xffff;
12158 int Hi16 = (PermMask & 0xffff0000) >> 16;
12168 auto OtherOpIs16Bit = TempOtherOp.getValueSizeInBits() == 16 ||
12170 if (!OtherOpIs16Bit)
12178 unsigned DWordOffset) {
12181 auto TypeSize = Src.getValueSizeInBits().getFixedValue();
12183 assert(Src.getValueSizeInBits().isKnownMultipleOf(8));
12188 if (Src.getValueType().isVector()) {
12189 auto ScalarTySize = Src.getScalarValueSizeInBits();
12190 auto ScalarTy = Src.getValueType().getScalarType();
12191 if (ScalarTySize == 32) {
12195 if (ScalarTySize > 32) {
12198 DAG.
getConstant(DWordOffset / (ScalarTySize / 32), SL, MVT::i32));
12199 auto ShiftVal = 32 * (DWordOffset % (ScalarTySize / 32));
12206 assert(ScalarTySize < 32);
12207 auto NumElements =
TypeSize / ScalarTySize;
12208 auto Trunc32Elements = (ScalarTySize * NumElements) / 32;
12209 auto NormalizedTrunc = Trunc32Elements * 32 / ScalarTySize;
12210 auto NumElementsIn32 = 32 / ScalarTySize;
12211 auto NumAvailElements = DWordOffset < Trunc32Elements
12213 : NumElements - NormalizedTrunc;
12226 auto ShiftVal = 32 * DWordOffset;
12234 [[maybe_unused]]
EVT VT =
N->getValueType(0);
12239 for (
int i = 0; i < 4; i++) {
12241 std::optional<ByteProvider<SDValue>>
P =
12244 if (!
P ||
P->isConstantZero())
12249 if (PermNodes.
size() != 4)
12252 std::pair<unsigned, unsigned> FirstSrc(0, PermNodes[0].SrcOffset / 4);
12253 std::optional<std::pair<unsigned, unsigned>> SecondSrc;
12255 for (
size_t i = 0; i < PermNodes.
size(); i++) {
12256 auto PermOp = PermNodes[i];
12259 int SrcByteAdjust = 4;
12263 if (!PermOp.hasSameSrc(PermNodes[FirstSrc.first]) ||
12264 ((PermOp.SrcOffset / 4) != FirstSrc.second)) {
12266 if (!PermOp.hasSameSrc(PermNodes[SecondSrc->first]) ||
12267 ((PermOp.SrcOffset / 4) != SecondSrc->second))
12271 SecondSrc = {i, PermNodes[i].SrcOffset / 4};
12272 assert(!(PermNodes[SecondSrc->first].Src->getValueSizeInBits() % 8));
12275 assert((PermOp.SrcOffset % 4) + SrcByteAdjust < 8);
12277 PermMask |= ((PermOp.SrcOffset % 4) + SrcByteAdjust) << (i * 8);
12280 SDValue Op = *PermNodes[FirstSrc.first].Src;
12282 assert(
Op.getValueSizeInBits() == 32);
12286 int Low16 = PermMask & 0xffff;
12287 int Hi16 = (PermMask & 0xffff0000) >> 16;
12289 bool WellFormedLow = (Low16 == 0x0504) || (Low16 == 0x0100);
12290 bool WellFormedHi = (Hi16 == 0x0706) || (Hi16 == 0x0302);
12293 if (WellFormedLow && WellFormedHi)
12297 SDValue OtherOp = SecondSrc ? *PermNodes[SecondSrc->first].Src :
Op;
12306 assert(
Op.getValueType().isByteSized() &&
12324 DAGCombinerInfo &DCI)
const {
12329 EVT VT =
N->getValueType(0);
12330 if (VT == MVT::i1) {
12334 SDValue Src = LHS.getOperand(0);
12335 if (Src != RHS.getOperand(0))
12340 if (!CLHS || !CRHS)
12344 static const uint32_t MaxMask = 0x3ff;
12363 Sel |= LHS.getConstantOperandVal(2);
12371 if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
12372 N->isDivergent() &&
TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
12376 auto usesCombinedOperand = [](
SDNode *OrUse) {
12378 if (OrUse->getOpcode() != ISD::BITCAST ||
12379 !OrUse->getValueType(0).isVector())
12383 for (
auto VUse : OrUse->uses()) {
12384 if (!VUse->getValueType(0).isVector())
12391 if (VUse->getOpcode() == VectorwiseOp)
12397 if (!
any_of(
N->uses(), usesCombinedOperand))
12403 if (LHSMask != ~0u && RHSMask != ~0u) {
12406 if (LHSMask > RHSMask) {
12413 uint32_t LHSUsedLanes = ~(LHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
12414 uint32_t RHSUsedLanes = ~(RHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
12417 if (!(LHSUsedLanes & RHSUsedLanes) &&
12420 !(LHSUsedLanes == 0x0c0c0000 && RHSUsedLanes == 0x00000c0c)) {
12422 LHSMask &= ~RHSUsedLanes;
12423 RHSMask &= ~LHSUsedLanes;
12425 LHSMask |= LHSUsedLanes & 0x04040404;
12431 LHS.getOperand(0), RHS.getOperand(0),
12435 if (LHSMask == ~0u || RHSMask == ~0u) {
12441 if (VT != MVT::i64 || DCI.isBeforeLegalizeOps())
12456 if (SrcVT == MVT::i32) {
12462 DCI.AddToWorklist(LowOr.
getNode());
12463 DCI.AddToWorklist(HiBits.
getNode());
12467 return DAG.
getNode(ISD::BITCAST, SL, MVT::i64, Vec);
12475 N->getOperand(0), CRHS))
12483 DAGCombinerInfo &DCI)
const {
12484 if (
SDValue RV = reassociateScalarOps(
N, DCI.DAG))
12493 EVT VT =
N->getValueType(0);
12494 if (CRHS && VT == MVT::i64) {
12502 if (LHS.getOpcode() ==
ISD::SELECT && VT == MVT::i32) {
12510 DAG.
getNode(ISD::BITCAST,
DL, MVT::f32, LHS->getOperand(1));
12512 DAG.
getNode(ISD::BITCAST,
DL, MVT::f32, LHS->getOperand(2));
12516 LHS->getOperand(0), FNegLHS, FNegRHS);
12517 return DAG.
getNode(ISD::BITCAST,
DL, VT, NewSelect);
12525 DAGCombinerInfo &DCI)
const {
12530 EVT VT =
N->getValueType(0);
12531 if (VT != MVT::i32)
12535 if (Src.getValueType() != MVT::i16)
12542SITargetLowering::performSignExtendInRegCombine(
SDNode *
N,
12543 DAGCombinerInfo &DCI)
const {
12550 VTSign->getVT() == MVT::i8) ||
12552 VTSign->getVT() == MVT::i16))) {
12554 "s_buffer_load_{u8, i8} are supported "
12555 "in GFX12 (or newer) architectures.");
12556 EVT VT = Src.getValueType();
12561 SDVTList ResList = DCI.DAG.getVTList(MVT::i32);
12568 SDValue BufferLoad = DCI.DAG.getMemIntrinsicNode(
12569 Opc,
DL, ResList, Ops, M->getMemoryVT(), M->getMemOperand());
12574 VTSign->getVT() == MVT::i8) ||
12576 VTSign->getVT() == MVT::i16)) &&
12590 SDVTList ResList = DCI.DAG.getVTList(MVT::i32,
12591 Src.getOperand(0).getValueType());
12594 SDValue BufferLoadSignExt = DCI.DAG.getMemIntrinsicNode(Opc,
SDLoc(
N),
12596 Ops, M->getMemoryVT(),
12597 M->getMemOperand());
12598 return DCI.DAG.getMergeValues({BufferLoadSignExt,
12605 DAGCombinerInfo &DCI)
const {
12613 if (
N->getOperand(0).isUndef())
12620 DAGCombinerInfo &DCI)
const {
12621 EVT VT =
N->getValueType(0);
12636 if ((VT == MVT::f16 && N0.
getOpcode() == ISD::FSQRT) &&
12647 unsigned Opcode =
Op.getOpcode();
12652 const auto &
F = CFP->getValueAPF();
12653 if (
F.isNaN() &&
F.isSignaling())
12655 if (!
F.isDenormal())
12681 case ISD::FP_EXTEND:
12682 case ISD::FP16_TO_FP:
12683 case ISD::FP_TO_FP16:
12684 case ISD::BF16_TO_FP:
12685 case ISD::FP_TO_BF16:
12718 if (
Op.getValueType() == MVT::i32) {
12724 if (RHS->getZExtValue() == 0xffff0000) {
12734 return Op.getValueType().getScalarType() != MVT::f16;
12738 case ISD::FMINNUM_IEEE:
12739 case ISD::FMAXNUM_IEEE:
12740 case ISD::FMINIMUM:
12741 case ISD::FMAXIMUM:
12802 if (
Op.getValueType() == MVT::i16) {
12805 TruncSrc.
getOpcode() == ISD::BITCAST &&
12813 unsigned IntrinsicID =
Op.getConstantOperandVal(0);
12815 switch (IntrinsicID) {
12816 case Intrinsic::amdgcn_cvt_pkrtz:
12817 case Intrinsic::amdgcn_cubeid:
12818 case Intrinsic::amdgcn_frexp_mant:
12819 case Intrinsic::amdgcn_fdot2:
12820 case Intrinsic::amdgcn_rcp:
12821 case Intrinsic::amdgcn_rsq:
12822 case Intrinsic::amdgcn_rsq_clamp:
12823 case Intrinsic::amdgcn_rcp_legacy:
12824 case Intrinsic::amdgcn_rsq_legacy:
12825 case Intrinsic::amdgcn_trig_preop:
12826 case Intrinsic::amdgcn_log:
12827 case Intrinsic::amdgcn_exp2:
12828 case Intrinsic::amdgcn_sqrt:
12849 unsigned Opcode =
MI->getOpcode();
12851 if (Opcode == AMDGPU::G_FCANONICALIZE)
12854 std::optional<FPValueAndVReg> FCR;
12857 if (FCR->Value.isSignaling())
12859 if (!FCR->Value.isDenormal())
12870 case AMDGPU::G_FADD:
12871 case AMDGPU::G_FSUB:
12872 case AMDGPU::G_FMUL:
12873 case AMDGPU::G_FCEIL:
12874 case AMDGPU::G_FFLOOR:
12875 case AMDGPU::G_FRINT:
12876 case AMDGPU::G_FNEARBYINT:
12877 case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
12878 case AMDGPU::G_INTRINSIC_TRUNC:
12879 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
12880 case AMDGPU::G_FMA:
12881 case AMDGPU::G_FMAD:
12882 case AMDGPU::G_FSQRT:
12883 case AMDGPU::G_FDIV:
12884 case AMDGPU::G_FREM:
12885 case AMDGPU::G_FPOW:
12886 case AMDGPU::G_FPEXT:
12887 case AMDGPU::G_FLOG:
12888 case AMDGPU::G_FLOG2:
12889 case AMDGPU::G_FLOG10:
12890 case AMDGPU::G_FPTRUNC:
12891 case AMDGPU::G_AMDGPU_RCP_IFLAG:
12892 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
12893 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
12894 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
12895 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
12897 case AMDGPU::G_FNEG:
12898 case AMDGPU::G_FABS:
12899 case AMDGPU::G_FCOPYSIGN:
12901 case AMDGPU::G_FMINNUM:
12902 case AMDGPU::G_FMAXNUM:
12903 case AMDGPU::G_FMINNUM_IEEE:
12904 case AMDGPU::G_FMAXNUM_IEEE:
12905 case AMDGPU::G_FMINIMUM:
12906 case AMDGPU::G_FMAXIMUM: {
12914 case AMDGPU::G_BUILD_VECTOR:
12919 case AMDGPU::G_INTRINSIC:
12920 case AMDGPU::G_INTRINSIC_CONVERGENT:
12922 case Intrinsic::amdgcn_fmul_legacy:
12923 case Intrinsic::amdgcn_fmad_ftz:
12924 case Intrinsic::amdgcn_sqrt:
12925 case Intrinsic::amdgcn_fmed3:
12926 case Intrinsic::amdgcn_sin:
12927 case Intrinsic::amdgcn_cos:
12928 case Intrinsic::amdgcn_log:
12929 case Intrinsic::amdgcn_exp2:
12930 case Intrinsic::amdgcn_log_clamp:
12931 case Intrinsic::amdgcn_rcp:
12932 case Intrinsic::amdgcn_rcp_legacy:
12933 case Intrinsic::amdgcn_rsq:
12934 case Intrinsic::amdgcn_rsq_clamp:
12935 case Intrinsic::amdgcn_rsq_legacy:
12936 case Intrinsic::amdgcn_div_scale:
12937 case Intrinsic::amdgcn_div_fmas:
12938 case Intrinsic::amdgcn_div_fixup:
12939 case Intrinsic::amdgcn_fract:
12940 case Intrinsic::amdgcn_cvt_pkrtz:
12941 case Intrinsic::amdgcn_cubeid:
12942 case Intrinsic::amdgcn_cubema:
12943 case Intrinsic::amdgcn_cubesc:
12944 case Intrinsic::amdgcn_cubetc:
12945 case Intrinsic::amdgcn_frexp_mant:
12946 case Intrinsic::amdgcn_fdot2:
12947 case Intrinsic::amdgcn_trig_preop:
12962SDValue SITargetLowering::getCanonicalConstantFP(
12965 if (
C.isDenormal()) {
12979 if (
C.isSignaling()) {
13001SDValue SITargetLowering::performFCanonicalizeCombine(
13003 DAGCombinerInfo &DCI)
const {
13006 EVT VT =
N->getValueType(0);
13015 EVT VT =
N->getValueType(0);
13016 return getCanonicalConstantFP(DAG,
SDLoc(
N), VT, CFP->getValueAPF());
13032 EVT EltVT =
Lo.getValueType();
13035 for (
unsigned I = 0;
I != 2; ++
I) {
13038 NewElts[
I] = getCanonicalConstantFP(DAG, SL, EltVT,
13039 CFP->getValueAPF());
13040 }
else if (
Op.isUndef()) {
13072 case ISD::FMAXNUM_IEEE:
13074 case ISD::FMAXIMUM:
13081 case ISD::FMINNUM_IEEE:
13083 case ISD::FMINIMUM:
13109 if (!MinK || !MaxK)
13122 if (VT == MVT::i32 || (VT == MVT::i16 && Subtarget->
hasMed3_16()))
13123 return DAG.
getNode(Med3Opc, SL, VT, Src, MaxVal, MinVal);
13165 if (Info->getMode().DX10Clamp) {
13174 if (VT == MVT::f32 || (VT == MVT::f16 && Subtarget->
hasMed3_16())) {
13202 case ISD::FMINNUM_IEEE:
13203 case ISD::FMAXNUM_IEEE:
13206 return (VT == MVT::f32) || (VT == MVT::f16 && Subtarget.
hasMin3Max3_16());
13207 case ISD::FMINIMUM:
13208 case ISD::FMAXIMUM:
13209 return (VT == MVT::f32 || VT == MVT::f16) && Subtarget.
hasIEEEMinMax3();
13214 return (VT == MVT::i32) || (VT == MVT::i16 && Subtarget.
hasMin3Max3_16());
13223 DAGCombinerInfo &DCI)
const {
13226 EVT VT =
N->getValueType(0);
13227 unsigned Opc =
N->getOpcode();
13241 N->getValueType(0),
13254 N->getValueType(0),
13264 if (
SDValue Med3 = performIntMed3ImmCombine(
13269 if (
SDValue Med3 = performIntMed3ImmCombine(
13275 if (
SDValue Med3 = performIntMed3ImmCombine(
13280 if (
SDValue Med3 = performIntMed3ImmCombine(
13286 if (((Opc == ISD::FMINNUM && Op0.
getOpcode() == ISD::FMAXNUM) ||
13287 (Opc == ISD::FMINNUM_IEEE && Op0.
getOpcode() == ISD::FMAXNUM_IEEE) ||
13290 (VT == MVT::f32 || VT == MVT::f64 ||
13294 if (
SDValue Res = performFPMed3ImmCombine(DAG,
SDLoc(
N), Op0, Op1))
13305 return (CA->isExactlyValue(0.0) && CB->isExactlyValue(1.0)) ||
13306 (CA->isExactlyValue(1.0) && CB->isExactlyValue(0.0));
13315 DAGCombinerInfo &DCI)
const {
13316 EVT VT =
N->getValueType(0);
13339 if (Info->getMode().DX10Clamp) {
13359 DAGCombinerInfo &DCI)
const {
13363 return DCI.DAG.getUNDEF(
N->getValueType(0));
13371 bool IsDivergentIdx,
13376 unsigned VecSize = EltSize * NumElem;
13379 if (VecSize <= 64 && EltSize < 32)
13388 if (IsDivergentIdx)
13392 unsigned NumInsts = NumElem +
13393 ((EltSize + 31) / 32) * NumElem ;
13398 return NumInsts <= 16;
13403 return NumInsts <= 15;
13409 SDValue Idx =
N->getOperand(
N->getNumOperands() - 1);
13423SDValue SITargetLowering::performExtractVectorEltCombine(
13424 SDNode *
N, DAGCombinerInfo &DCI)
const {
13430 EVT ResVT =
N->getValueType(0);
13449 if (Vec.
hasOneUse() && DCI.isBeforeLegalize() && VecEltVT == ResVT) {
13468 case ISD::FMAXNUM_IEEE:
13469 case ISD::FMINNUM_IEEE:
13470 case ISD::FMAXIMUM:
13471 case ISD::FMINIMUM: {
13477 DCI.AddToWorklist(Elt0.
getNode());
13478 DCI.AddToWorklist(Elt1.
getNode());
13500 if (!DCI.isBeforeLegalize())
13508 VecSize > 32 && VecSize % 32 == 0 && Idx) {
13511 unsigned BitIndex = Idx->getZExtValue() * VecEltSize;
13512 unsigned EltIdx = BitIndex / 32;
13513 unsigned LeftoverBitIdx = BitIndex % 32;
13517 DCI.AddToWorklist(Cast.
getNode());
13521 DCI.AddToWorklist(Elt.
getNode());
13524 DCI.AddToWorklist(Srl.
getNode());
13528 DCI.AddToWorklist(Trunc.
getNode());
13530 if (VecEltVT == ResVT) {
13531 return DAG.
getNode(ISD::BITCAST, SL, VecEltVT, Trunc);
13542SITargetLowering::performInsertVectorEltCombine(
SDNode *
N,
13543 DAGCombinerInfo &DCI)
const {
13573 if (Src.getOpcode() == ISD::FP_EXTEND &&
13574 Src.getOperand(0).getValueType() == MVT::f16) {
13575 return Src.getOperand(0);
13579 APFloat Val = CFP->getValueAPF();
13580 bool LosesInfo =
true;
13590 DAGCombinerInfo &DCI)
const {
13592 "combine only useful on gfx8");
13595 EVT VT =
N->getValueType(0);
13596 if (VT != MVT::f16)
13631 return DAG.
getNode(ISD::FMINNUM_IEEE, SL, VT, B1, C1);
13634unsigned SITargetLowering::getFusedOpcode(
const SelectionDAG &DAG,
13636 const SDNode *N1)
const {
13641 if (((VT == MVT::f32 &&
13643 (VT == MVT::f16 && Subtarget->
hasMadF16() &&
13663 EVT VT =
N->getValueType(0);
13664 if (VT != MVT::i32 && VT != MVT::i64)
13670 unsigned Opc =
N->getOpcode();
13693 return DAG.
getNode(Opc, SL, VT, Add1, Op2);
13715 DAGCombinerInfo &DCI)
const {
13719 EVT VT =
N->getValueType(0);
13729 if (!
N->isDivergent() && Subtarget->
hasSMulHi())
13733 if (NumBits <= 32 || NumBits > 64)
13736 if (LHS.getOpcode() !=
ISD::MUL) {
13745 unsigned NumUsers = 0;
13770 bool MulSignedLo =
false;
13771 if (!MulLHSUnsigned32 || !MulRHSUnsigned32) {
13780 if (VT != MVT::i64) {
13803 getMad64_32(DAG, SL, MVT::i64, MulLHSLo, MulRHSLo, AddRHS, MulSignedLo);
13805 if (!MulSignedLo && (!MulLHSUnsigned32 || !MulRHSUnsigned32)) {
13807 std::tie(AccumLo, AccumHi) = DAG.
SplitScalar(Accum, SL, MVT::i32, MVT::i32);
13809 if (!MulLHSUnsigned32) {
13816 if (!MulRHSUnsigned32) {
13827 if (VT != MVT::i64)
13834static std::optional<ByteProvider<SDValue>>
13837 if (!Byte0 || Byte0->isConstantZero()) {
13838 return std::nullopt;
13841 if (Byte1 && !Byte1->isConstantZero()) {
13842 return std::nullopt;
13848 unsigned FirstCs =
First & 0x0c0c0c0c;
13849 unsigned SecondCs = Second & 0x0c0c0c0c;
13850 unsigned FirstNoCs =
First & ~0x0c0c0c0c;
13851 unsigned SecondNoCs = Second & ~0x0c0c0c0c;
13853 assert((FirstCs & 0xFF) | (SecondCs & 0xFF));
13854 assert((FirstCs & 0xFF00) | (SecondCs & 0xFF00));
13855 assert((FirstCs & 0xFF0000) | (SecondCs & 0xFF0000));
13856 assert((FirstCs & 0xFF000000) | (SecondCs & 0xFF000000));
13858 return (FirstNoCs | SecondNoCs) | (FirstCs & SecondCs);
13882 for (
int BPI = 0; BPI < 2; BPI++) {
13885 BPP = {Src1, Src0};
13887 unsigned ZeroMask = 0x0c0c0c0c;
13888 unsigned FMask = 0xFF << (8 * (3 - Step));
13890 unsigned FirstMask =
13891 (BPP.first.SrcOffset % 4) << (8 * (3 - Step)) | (ZeroMask & ~FMask);
13892 unsigned SecondMask =
13893 (BPP.second.SrcOffset % 4) << (8 * (3 - Step)) | (ZeroMask & ~FMask);
13897 int FirstGroup = -1;
13898 for (
int I = 0;
I < 2;
I++) {
13900 auto MatchesFirst = [&BPP](
DotSrc &IterElt) {
13901 return IterElt.SrcOp == *BPP.first.Src &&
13902 (IterElt.DWordOffset == (BPP.first.SrcOffset / 4));
13912 if (FirstGroup != -1) {
13914 auto MatchesSecond = [&BPP](
DotSrc &IterElt) {
13915 return IterElt.SrcOp == *BPP.second.Src &&
13916 (IterElt.DWordOffset == (BPP.second.SrcOffset / 4));
13922 Srcs.
push_back({*BPP.second.Src, SecondMask, BPP.second.SrcOffset / 4});
13930 unsigned ZeroMask = 0x0c0c0c0c;
13931 unsigned FMask = 0xFF << (8 * (3 - Step));
13935 ((Src0.
SrcOffset % 4) << (8 * (3 - Step)) | (ZeroMask & ~FMask)),
13939 ((Src1.
SrcOffset % 4) << (8 * (3 - Step)) | (ZeroMask & ~FMask)),
13950 if (Srcs.
size() == 1) {
13951 auto Elt = Srcs.
begin();
13955 if (Elt->PermMask == 0x3020100)
13962 auto FirstElt = Srcs.
begin();
13963 auto SecondElt = std::next(FirstElt);
13970 auto FirstMask = FirstElt->PermMask;
13971 auto SecondMask = SecondElt->PermMask;
13973 unsigned FirstCs = FirstMask & 0x0c0c0c0c;
13974 unsigned FirstPlusFour = FirstMask | 0x04040404;
13977 FirstMask = (FirstPlusFour & 0x0F0F0F0F) | FirstCs;
13989 FirstElt = std::next(SecondElt);
13990 if (FirstElt == Srcs.
end())
13993 SecondElt = std::next(FirstElt);
13996 if (SecondElt == Srcs.
end()) {
14002 DAG.
getConstant(FirstElt->PermMask, SL, MVT::i32)));
14008 return Perms.
size() == 2
14014 for (
auto &[EntryVal, EntryMask, EntryOffset] : Srcs) {
14015 EntryMask = EntryMask >> ((4 - ChainLength) * 8);
14016 auto ZeroMask = ChainLength == 2 ? 0x0c0c0000 : 0x0c000000;
14017 EntryMask += ZeroMask;
14022 auto Opcode =
Op.getOpcode();
14028static std::optional<bool>
14039 bool S0IsSigned = Known0.countMinLeadingOnes() > 0;
14042 bool S1IsSigned = Known1.countMinLeadingOnes() > 0;
14044 assert(!(S0IsUnsigned && S0IsSigned));
14045 assert(!(S1IsUnsigned && S1IsSigned));
14053 if ((S0IsUnsigned && S1IsUnsigned) || (S0IsSigned && S1IsSigned))
14059 if ((S0IsUnsigned && S1IsSigned) || (S0IsSigned && S1IsUnsigned))
14060 return std::nullopt;
14072 if ((S0IsSigned && !(S1IsSigned || S1IsUnsigned)) ||
14073 ((S1IsSigned && !(S0IsSigned || S0IsUnsigned))))
14078 if ((!(S1IsSigned || S1IsUnsigned) && !(S0IsSigned || S0IsUnsigned)))
14084 if ((S0IsUnsigned && !(S1IsSigned || S1IsUnsigned)) ||
14085 ((S1IsUnsigned && !(S0IsSigned || S0IsUnsigned))))
14086 return std::nullopt;
14092 DAGCombinerInfo &DCI)
const {
14094 EVT VT =
N->getValueType(0);
14101 if (
SDValue Folded = tryFoldToMad64_32(
N, DCI))
14106 if (
SDValue V = reassociateScalarOps(
N, DAG)) {
14113 std::optional<bool> IsSigned;
14119 int ChainLength = 0;
14120 for (
int I = 0;
I < 4;
I++) {
14121 auto MulIdx =
isMul(LHS) ? 0 :
isMul(RHS) ? 1 : -1;
14124 auto Src0 =
handleMulOperand(TempNode->getOperand(MulIdx)->getOperand(0));
14127 auto Src1 =
handleMulOperand(TempNode->getOperand(MulIdx)->getOperand(1));
14132 TempNode->getOperand(MulIdx), *Src0, *Src1,
14133 TempNode->getOperand(MulIdx)->getOperand(0),
14134 TempNode->getOperand(MulIdx)->getOperand(1), DAG);
14138 IsSigned = *IterIsSigned;
14139 if (*IterIsSigned != *IsSigned)
14142 auto AddIdx = 1 - MulIdx;
14145 if (
I == 2 &&
isMul(TempNode->getOperand(AddIdx))) {
14146 Src2s.
push_back(TempNode->getOperand(AddIdx));
14156 TempNode->getOperand(AddIdx), *Src0, *Src1,
14157 TempNode->getOperand(AddIdx)->getOperand(0),
14158 TempNode->getOperand(AddIdx)->getOperand(1), DAG);
14162 if (*IterIsSigned != *IsSigned)
14166 ChainLength =
I + 2;
14170 TempNode = TempNode->getOperand(AddIdx);
14172 ChainLength =
I + 1;
14173 if (TempNode->getNumOperands() < 2)
14175 LHS = TempNode->getOperand(0);
14176 RHS = TempNode->getOperand(1);
14179 if (ChainLength < 2)
14185 if (ChainLength < 4) {
14195 bool UseOriginalSrc =
false;
14196 if (ChainLength == 4 && Src0s.
size() == 1 && Src1s.
size() == 1 &&
14197 Src0s.
begin()->PermMask == Src1s.
begin()->PermMask &&
14198 Src0s.
begin()->SrcOp.getValueSizeInBits() >= 32 &&
14199 Src1s.
begin()->SrcOp.getValueSizeInBits() >= 32) {
14201 auto Src0Mask = Src0s.
begin()->PermMask;
14202 SrcBytes.
push_back(Src0Mask & 0xFF000000);
14203 bool UniqueEntries =
true;
14204 for (
auto I = 1;
I < 4;
I++) {
14205 auto NextByte = Src0Mask & (0xFF << ((3 -
I) * 8));
14208 UniqueEntries =
false;
14214 if (UniqueEntries) {
14215 UseOriginalSrc =
true;
14217 auto FirstElt = Src0s.
begin();
14221 auto SecondElt = Src1s.
begin();
14223 SecondElt->DWordOffset);
14232 if (!UseOriginalSrc) {
14239 DAG.
getExtOrTrunc(*IsSigned, Src2s[ChainLength - 1], SL, MVT::i32);
14242 : Intrinsic::amdgcn_udot4,
14252 if (VT != MVT::i32 || !DCI.isAfterLegalizeDAG())
14257 unsigned Opc = LHS.getOpcode();
14262 Opc = RHS.getOpcode();
14268 auto Cond = RHS.getOperand(0);
14276 return DAG.
getNode(Opc, SL, VTList, Args);
14290 DAGCombinerInfo &DCI)
const {
14292 EVT VT =
N->getValueType(0);
14294 if (VT != MVT::i32)
14303 unsigned Opc = RHS.getOpcode();
14309 auto Cond = RHS.getOperand(0);
14317 return DAG.
getNode(Opc, SL, VTList, Args);
14331SDValue SITargetLowering::performAddCarrySubCarryCombine(
SDNode *
N,
14332 DAGCombinerInfo &DCI)
const {
14334 if (
N->getValueType(0) != MVT::i32)
14346 unsigned Opc =
N->getOpcode();
14349 SDValue Args[] = { LHS.getOperand(0), LHS.getOperand(1),
N->getOperand(2) };
14356 DAGCombinerInfo &DCI)
const {
14361 EVT VT =
N->getValueType(0);
14373 if (
A == LHS.getOperand(1)) {
14374 unsigned FusedOp = getFusedOpcode(DAG,
N, LHS.getNode());
14375 if (FusedOp != 0) {
14377 return DAG.
getNode(FusedOp, SL, VT,
A, Two, RHS);
14385 if (
A == RHS.getOperand(1)) {
14386 unsigned FusedOp = getFusedOpcode(DAG,
N, RHS.getNode());
14387 if (FusedOp != 0) {
14389 return DAG.
getNode(FusedOp, SL, VT,
A, Two, LHS);
14398 DAGCombinerInfo &DCI)
const {
14404 EVT VT =
N->getValueType(0);
14417 if (
A == LHS.getOperand(1)) {
14418 unsigned FusedOp = getFusedOpcode(DAG,
N, LHS.getNode());
14423 return DAG.
getNode(FusedOp, SL, VT,
A, Two, NegRHS);
14432 if (
A == RHS.getOperand(1)) {
14433 unsigned FusedOp = getFusedOpcode(DAG,
N, RHS.getNode());
14436 return DAG.
getNode(FusedOp, SL, VT,
A, NegTwo, LHS);
14445 DAGCombinerInfo &DCI)
const {
14448 EVT VT =
N->getValueType(0);
14462 bool IsNegative =
false;
14463 if (CLHS->isExactlyValue(1.0) ||
14464 (IsNegative = CLHS->isExactlyValue(-1.0))) {
14467 if (RHS.getOpcode() == ISD::FSQRT) {
14471 return IsNegative ? DAG.
getNode(ISD::FNEG, SL, VT, Rsq, Flags) : Rsq;
14480 DAGCombinerInfo &DCI)
const {
14482 EVT VT =
N->getValueType(0);
14504 (
N->getFlags().hasAllowContract() &&
14505 FMA->getFlags().hasAllowContract())) {
14520 if (FMAOp1.
getOpcode() != ISD::FP_EXTEND ||
14539 if (Vec1 == Vec2 || Vec3 == Vec4)
14545 if ((Vec1 == Vec3 && Vec2 == Vec4) ||
14546 (Vec1 == Vec4 && Vec2 == Vec3)) {
14555 DAGCombinerInfo &DCI)
const {
14561 EVT VT = LHS.getValueType();
14598 LHS.getConstantOperandVal(1) != LHS.getConstantOperandVal(2) &&
14605 const APInt &CT = LHS.getConstantOperandAPInt(1);
14606 const APInt &CF = LHS.getConstantOperandAPInt(2);
14618 if (VT != MVT::f32 && VT != MVT::f64 &&
14651 DAGCombinerInfo &DCI)
const {
14673 unsigned ShiftOffset = 8 *
Offset;
14675 ShiftOffset -=
C->getZExtValue();
14677 ShiftOffset +=
C->getZExtValue();
14679 if (ShiftOffset < 32 && (ShiftOffset % 8) == 0) {
14681 MVT::f32, Shifted);
14692 DCI.AddToWorklist(
N);
14699 return DAG.
getNode(
N->getOpcode(), SL, MVT::f32, DemandedSrc);
14705 DAGCombinerInfo &DCI)
const {
14715 return DCI.DAG.getConstantFP(Zero,
SDLoc(
N),
N->getValueType(0));
14718 APFloat One(
F.getSemantics(),
"1.0");
14720 return DCI.DAG.getConstantFP(One,
SDLoc(
N),
N->getValueType(0));
14728 switch (
N->getOpcode()) {
14744 if (
auto Res = promoteUniformOpToI32(
SDValue(
N, 0), DCI))
14754 switch (
N->getOpcode()) {
14756 return performAddCombine(
N, DCI);
14758 return performSubCombine(
N, DCI);
14761 return performAddCarrySubCarryCombine(
N, DCI);
14763 return performFAddCombine(
N, DCI);
14765 return performFSubCombine(
N, DCI);
14767 return performFDivCombine(
N, DCI);
14769 return performSetCCCombine(
N, DCI);
14772 case ISD::FMAXNUM_IEEE:
14773 case ISD::FMINNUM_IEEE:
14774 case ISD::FMAXIMUM:
14775 case ISD::FMINIMUM:
14782 return performMinMaxCombine(
N, DCI);
14784 return performFMACombine(
N, DCI);
14786 return performAndCombine(
N, DCI);
14788 return performOrCombine(
N, DCI);
14791 if (
N->getValueType(0) == MVT::i32 &&
N->isDivergent() &&
14792 TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
14798 return performXorCombine(
N, DCI);
14800 return performZeroExtendCombine(
N, DCI);
14802 return performSignExtendInRegCombine(
N , DCI);
14804 return performClassCombine(
N, DCI);
14806 return performFCanonicalizeCombine(
N, DCI);
14808 return performRcpCombine(
N, DCI);
14823 return performUCharToFloatCombine(
N, DCI);
14825 return performFCopySignCombine(
N, DCI);
14830 return performCvtF32UByteNCombine(
N, DCI);
14832 return performFMed3Combine(
N, DCI);
14834 return performCvtPkRTZCombine(
N, DCI);
14836 return performClampCombine(
N, DCI);
14839 EVT VT =
N->getValueType(0);
14842 if (VT == MVT::v2i16 || VT == MVT::v2f16 || VT == MVT::v2bf16) {
14845 EVT EltVT = Src.getValueType();
14846 if (EltVT != MVT::i16)
14847 Src = DAG.
getNode(ISD::BITCAST, SL, MVT::i16, Src);
14850 return DAG.
getNode(ISD::BITCAST, SL, VT, Ext);
14856 return performExtractVectorEltCombine(
N, DCI);
14858 return performInsertVectorEltCombine(
N, DCI);
14860 return performFPRoundCombine(
N, DCI);
14869 return performMemSDNodeCombine(MemNode, DCI);
14882 default:
return ~0u;
14883 case AMDGPU::sub0:
return 0;
14884 case AMDGPU::sub1:
return 1;
14885 case AMDGPU::sub2:
return 2;
14886 case AMDGPU::sub3:
return 3;
14887 case AMDGPU::sub4:
return 4;
14894 unsigned Opcode =
Node->getMachineOpcode();
14898 if (D16Idx >= 0 &&
Node->getConstantOperandVal(D16Idx))
14904 unsigned OldDmask =
Node->getConstantOperandVal(DmaskIdx);
14905 unsigned NewDmask = 0;
14908 bool UsesTFC = ((int(TFEIdx) >= 0 &&
Node->getConstantOperandVal(TFEIdx)) ||
14909 (int(LWEIdx) >= 0 &&
Node->getConstantOperandVal(LWEIdx)))
14912 unsigned TFCLane = 0;
14913 bool HasChain =
Node->getNumValues() > 1;
14915 if (OldDmask == 0) {
14923 TFCLane = OldBitsSet;
14931 if (
I.getUse().getResNo() != 0)
14935 if (!
I->isMachineOpcode() ||
14936 I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
14948 if (UsesTFC && Lane == TFCLane) {
14953 for (
unsigned i = 0, Dmask = OldDmask; (i <= Lane) && (Dmask != 0); i++) {
14955 Dmask &= ~(1 << Comp);
14963 NewDmask |= 1 << Comp;
14968 bool NoChannels = !NewDmask;
14975 if (OldBitsSet == 1)
14981 if (NewDmask == OldDmask)
14990 unsigned NewChannels = BitsSet + UsesTFC;
14994 assert(NewOpcode != -1 &&
14995 NewOpcode !=
static_cast<int>(
Node->getMachineOpcode()) &&
14996 "failed to find equivalent MIMG op");
15004 MVT SVT =
Node->getValueType(0).getVectorElementType().getSimpleVT();
15006 MVT ResultVT = NewChannels == 1 ?
15008 NewChannels == 5 ? 8 : NewChannels);
15022 if (NewChannels == 1) {
15032 for (
unsigned i = 0, Idx = AMDGPU::sub0; i < 5; ++i) {
15037 if (i || !NoChannels)
15042 if (NewUser !=
User) {
15050 case AMDGPU::sub0: Idx = AMDGPU::sub1;
break;
15051 case AMDGPU::sub1: Idx = AMDGPU::sub2;
break;
15052 case AMDGPU::sub2: Idx = AMDGPU::sub3;
break;
15053 case AMDGPU::sub3: Idx = AMDGPU::sub4;
break;
15063 Op =
Op.getOperand(0);
15083 MRI.createVirtualRegister(&AMDGPU::VReg_1RegClass), MVT::i1);
15087 = DAG.
getCopyToReg(Node->getOperand(0), SL, VReg, SrcVal,
15094 return ToResultReg.
getNode();
15099 for (
unsigned i = 0; i < Node->getNumOperands(); ++i) {
15107 Node->getOperand(i).getValueType(),
15108 Node->getOperand(i)), 0));
15119 unsigned Opcode = Node->getMachineOpcode();
15121 if (
TII->isImage(Opcode) && !
TII->get(Opcode).mayStore() &&
15122 !
TII->isGather4(Opcode) &&
15124 return adjustWritemask(Node, DAG);
15127 if (Opcode == AMDGPU::INSERT_SUBREG ||
15128 Opcode == AMDGPU::REG_SEQUENCE) {
15134 case AMDGPU::V_DIV_SCALE_F32_e64:
15135 case AMDGPU::V_DIV_SCALE_F64_e64: {
15145 (Src0 == Src1 || Src0 == Src2))
15202 unsigned InitIdx = 0;
15204 if (
TII->isImage(
MI)) {
15212 unsigned TFEVal = TFE ? TFE->
getImm() : 0;
15213 unsigned LWEVal = LWE ? LWE->
getImm() : 0;
15214 unsigned D16Val = D16 ? D16->getImm() : 0;
15216 if (!TFEVal && !LWEVal)
15227 assert(MO_Dmask &&
"Expected dmask operand in instruction");
15229 unsigned dmask = MO_Dmask->
getImm();
15236 InitIdx = D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
15242 TRI.getRegSizeInBits(*
TII->getOpRegClass(
MI, DstIdx)) / 32;
15243 if (DstSize < InitIdx)
15246 InitIdx =
TRI.getRegSizeInBits(*
TII->getOpRegClass(
MI, DstIdx)) / 32;
15254 Register PrevDst =
MRI.cloneVirtualRegister(
MI.getOperand(DstIdx).getReg());
15255 unsigned NewDst = 0;
15264 for (; SizeLeft; SizeLeft--, CurrIdx++) {
15265 NewDst =
MRI.createVirtualRegister(
TII->getOpRegClass(
MI, DstIdx));
15283 MI.tieOperands(DstIdx,
MI.getNumOperands() - 1);
15296 if (
TII->isVOP3(
MI.getOpcode())) {
15298 TII->legalizeOperandsVOP3(
MRI,
MI);
15303 if (!
MI.getDesc().operands().empty()) {
15304 unsigned Opc =
MI.getOpcode();
15305 bool HasAGPRs = Info->mayNeedAGPRs();
15313 if ((
I == Src2Idx) && (HasAGPRs))
15316 if (!
Op.isReg() || !
Op.getReg().isVirtual())
15318 auto *RC =
TRI->getRegClassForReg(
MRI,
Op.getReg());
15319 if (!
TRI->hasAGPRs(RC))
15321 auto *Src =
MRI.getUniqueVRegDef(
Op.getReg());
15322 if (!Src || !Src->isCopy() ||
15323 !
TRI->isSGPRReg(
MRI, Src->getOperand(1).getReg()))
15325 auto *NewRC =
TRI->getEquivalentVGPRClass(RC);
15329 MRI.setRegClass(
Op.getReg(), NewRC);
15336 if (
auto *Src2 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src2)) {
15337 if (Src2->isReg() && Src2->getReg().isVirtual()) {
15338 auto *RC =
TRI->getRegClassForReg(
MRI, Src2->getReg());
15339 if (
TRI->isVectorSuperClass(RC)) {
15340 auto *NewRC =
TRI->getEquivalentAGPRClass(RC);
15341 MRI.setRegClass(Src2->getReg(), NewRC);
15342 if (Src2->isTied())
15343 MRI.setRegClass(
MI.getOperand(0).getReg(), NewRC);
15352 if (
TII->isImage(
MI))
15353 TII->enforceOperandRCAlignment(
MI, AMDGPU::OpName::vaddr);
15379 MVT::v2i32, Ops0), 0);
15409 RsrcDword2And3 & UINT64_C(0xFFFFFFFF));
15431std::pair<unsigned, const TargetRegisterClass *>
15438 if (Constraint.
size() == 1) {
15440 switch (Constraint[0]) {
15447 RC = &AMDGPU::SReg_32RegClass;
15450 RC = &AMDGPU::SGPR_64RegClass;
15455 return std::pair(0U,
nullptr);
15462 RC = &AMDGPU::VGPR_32RegClass;
15467 return std::pair(0U,
nullptr);
15476 RC = &AMDGPU::AGPR_32RegClass;
15481 return std::pair(0U,
nullptr);
15490 return std::pair(0U, RC);
15495 if (
RegName.consume_front(
"v")) {
15496 RC = &AMDGPU::VGPR_32RegClass;
15497 }
else if (
RegName.consume_front(
"s")) {
15498 RC = &AMDGPU::SGPR_32RegClass;
15499 }
else if (
RegName.consume_front(
"a")) {
15500 RC = &AMDGPU::AGPR_32RegClass;
15505 if (
RegName.consume_front(
"[")) {
15515 RC =
TRI->getVGPRClassForBitWidth(Width);
15517 RC =
TRI->getSGPRClassForBitWidth(Width);
15519 RC =
TRI->getAGPRClassForBitWidth(Width);
15521 Reg =
TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, RC);
15522 return std::pair(Reg, RC);
15527 if (!
Failed && Idx < RC->getNumRegs())
15535 Ret.second =
TRI->getPhysRegBaseClass(Ret.first);
15541 if (Constraint.
size() == 1) {
15542 switch (Constraint[0]) {
15551 }
else if (Constraint ==
"DA" ||
15552 Constraint ==
"DB") {
15560 if (Constraint.
size() == 1) {
15561 switch (Constraint[0]) {
15584 std::vector<SDValue> &Ops,
15599 unsigned Size =
Op.getScalarValueSizeInBits();
15607 Val =
C->getSExtValue();
15611 Val =
C->getValueAPF().bitcastToAPInt().getSExtValue();
15617 if (
Op.getOperand(0).isUndef() ||
Op.getOperand(1).isUndef())
15620 Val =
C->getSExtValue();
15624 Val =
C->getValueAPF().bitcastToAPInt().getSExtValue();
15634 if (Constraint.
size() == 1) {
15635 switch (Constraint[0]) {
15650 }
else if (Constraint.
size() == 2) {
15651 if (Constraint ==
"DA") {
15652 int64_t HiBits =
static_cast<int32_t
>(Val >> 32);
15653 int64_t LoBits =
static_cast<int32_t
>(Val);
15657 if (Constraint ==
"DB") {
15665 unsigned MaxSize)
const {
15666 unsigned Size = std::min<unsigned>(
Op.getScalarValueSizeInBits(), MaxSize);
15669 MVT VT =
Op.getSimpleValueType();
15694 switch (UnalignedClassID) {
15695 case AMDGPU::VReg_64RegClassID:
15696 return AMDGPU::VReg_64_Align2RegClassID;
15697 case AMDGPU::VReg_96RegClassID:
15698 return AMDGPU::VReg_96_Align2RegClassID;
15699 case AMDGPU::VReg_128RegClassID:
15700 return AMDGPU::VReg_128_Align2RegClassID;
15701 case AMDGPU::VReg_160RegClassID:
15702 return AMDGPU::VReg_160_Align2RegClassID;
15703 case AMDGPU::VReg_192RegClassID:
15704 return AMDGPU::VReg_192_Align2RegClassID;
15705 case AMDGPU::VReg_224RegClassID:
15706 return AMDGPU::VReg_224_Align2RegClassID;
15707 case AMDGPU::VReg_256RegClassID:
15708 return AMDGPU::VReg_256_Align2RegClassID;
15709 case AMDGPU::VReg_288RegClassID:
15710 return AMDGPU::VReg_288_Align2RegClassID;
15711 case AMDGPU::VReg_320RegClassID:
15712 return AMDGPU::VReg_320_Align2RegClassID;
15713 case AMDGPU::VReg_352RegClassID:
15714 return AMDGPU::VReg_352_Align2RegClassID;
15715 case AMDGPU::VReg_384RegClassID:
15716 return AMDGPU::VReg_384_Align2RegClassID;
15717 case AMDGPU::VReg_512RegClassID:
15718 return AMDGPU::VReg_512_Align2RegClassID;
15719 case AMDGPU::VReg_1024RegClassID:
15720 return AMDGPU::VReg_1024_Align2RegClassID;
15721 case AMDGPU::AReg_64RegClassID:
15722 return AMDGPU::AReg_64_Align2RegClassID;
15723 case AMDGPU::AReg_96RegClassID:
15724 return AMDGPU::AReg_96_Align2RegClassID;
15725 case AMDGPU::AReg_128RegClassID:
15726 return AMDGPU::AReg_128_Align2RegClassID;
15727 case AMDGPU::AReg_160RegClassID:
15728 return AMDGPU::AReg_160_Align2RegClassID;
15729 case AMDGPU::AReg_192RegClassID:
15730 return AMDGPU::AReg_192_Align2RegClassID;
15731 case AMDGPU::AReg_256RegClassID:
15732 return AMDGPU::AReg_256_Align2RegClassID;
15733 case AMDGPU::AReg_512RegClassID:
15734 return AMDGPU::AReg_512_Align2RegClassID;
15735 case AMDGPU::AReg_1024RegClassID:
15736 return AMDGPU::AReg_1024_Align2RegClassID;
15752 if (Info->isEntryFunction()) {
15759 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
15761 ? AMDGPU::SGPR_32RegClass.getRegister(MaxNumSGPRs - 1)
15762 :
TRI->getAlignedHighSGPRForRC(MF, 2,
15763 &AMDGPU::SGPR_64RegClass);
15764 Info->setSGPRForEXECCopy(SReg);
15766 assert(!
TRI->isSubRegister(Info->getScratchRSrcReg(),
15767 Info->getStackPtrOffsetReg()));
15768 if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG)
15769 MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
15773 if (Info->getScratchRSrcReg() != AMDGPU::PRIVATE_RSRC_REG)
15774 MRI.replaceRegWith(AMDGPU::PRIVATE_RSRC_REG, Info->getScratchRSrcReg());
15776 if (Info->getFrameOffsetReg() != AMDGPU::FP_REG)
15777 MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg());
15779 Info->limitOccupancy(MF);
15781 if (ST.isWave32() && !MF.
empty()) {
15782 for (
auto &
MBB : MF) {
15783 for (
auto &
MI :
MBB) {
15784 TII->fixImplicitOperands(
MI);
15794 if (ST.needsAlignedVGPRs()) {
15795 for (
unsigned I = 0, E =
MRI.getNumVirtRegs();
I != E; ++
I) {
15801 if (NewClassID != -1)
15802 MRI.setRegClass(Reg,
TRI->getRegClass(NewClassID));
15811 const APInt &DemandedElts,
15813 unsigned Depth)
const {
15815 unsigned Opc =
Op.getOpcode();
15818 unsigned IID =
Op.getConstantOperandVal(0);
15820 case Intrinsic::amdgcn_mbcnt_lo:
15821 case Intrinsic::amdgcn_mbcnt_hi: {
15827 IID == Intrinsic::amdgcn_mbcnt_lo ? ST.getWavefrontSizeLog2() : 5);
15837 Op, Known, DemandedElts, DAG,
Depth);
15852 unsigned MaxValue =
15861 switch (
MI->getOpcode()) {
15862 case AMDGPU::G_INTRINSIC:
15863 case AMDGPU::G_INTRINSIC_CONVERGENT: {
15866 case Intrinsic::amdgcn_workitem_id_x:
15869 case Intrinsic::amdgcn_workitem_id_y:
15872 case Intrinsic::amdgcn_workitem_id_z:
15875 case Intrinsic::amdgcn_mbcnt_lo:
15876 case Intrinsic::amdgcn_mbcnt_hi: {
15888 case Intrinsic::amdgcn_groupstaticsize: {
15899 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
15902 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
15905 case AMDGPU::G_AMDGPU_SMED3:
15906 case AMDGPU::G_AMDGPU_UMED3: {
15907 auto [Dst, Src0, Src1, Src2] =
MI->getFirst4Regs();
15934 unsigned Depth)
const {
15942 if (
MaybeAlign RetAlign = Attrs.getRetAlignment())
15969 if (Header->getAlignment() != PrefAlign)
15972 unsigned LoopSize = 0;
15980 LoopSize +=
TII->getInstSizeInBytes(
MI);
15981 if (LoopSize > 192)
15986 if (LoopSize <= 64)
15989 if (LoopSize <= 128)
15990 return CacheLineAlign;
15996 auto I = Exit->getFirstNonDebugInstr();
15997 if (
I != Exit->end() &&
I->getOpcode() == AMDGPU::S_INST_PREFETCH)
15998 return CacheLineAlign;
16007 if (PreTerm == Pre->
begin() ||
16008 std::prev(PreTerm)->getOpcode() != AMDGPU::S_INST_PREFETCH)
16012 auto ExitHead = Exit->getFirstNonDebugInstr();
16013 if (ExitHead == Exit->end() ||
16014 ExitHead->getOpcode() != AMDGPU::S_INST_PREFETCH)
16019 return CacheLineAlign;
16027 N =
N->getOperand(0).getNode();
16028 if (
N->getOpcode() == ISD::INLINEASM ||
16029 N->getOpcode() == ISD::INLINEASM_BR)
16038 switch (
N->getOpcode()) {
16046 if (Reg.isPhysical() ||
MRI.isLiveIn(Reg))
16047 return !
TRI->isSGPRReg(
MRI, Reg);
16049 if (
const Value *V = FLI->getValueFromVirtualReg(R->getReg()))
16053 return !
TRI->isSGPRReg(
MRI, Reg);
16057 unsigned AS = L->getAddressSpace();
16061 case ISD::CALLSEQ_END:
16090 return A->readMem() &&
A->writeMem();
16111 switch (Ty.getScalarSizeInBits()) {
16125 unsigned Depth)
const {
16130 if (Info->getMode().DX10Clamp)
16142 if (RMW->
hasMetadata(
"amdgpu.ignore.denormal.mode"))
16162 <<
"Hardware instruction generated for atomic "
16164 <<
" operation at memory scope " << MemScope;
16169 Type *EltTy = VT->getElementType();
16170 return VT->getNumElements() == 2 &&
16190 unsigned BW =
IT->getBitWidth();
16191 return BW == 32 || BW == 64;
16205 unsigned BW =
DL.getPointerSizeInBits(PT->getAddressSpace());
16206 return BW == 32 || BW == 64;
16209 if (Ty->isFloatTy() || Ty->isDoubleTy())
16213 return VT->getNumElements() == 2 &&
16214 VT->getElementType()->getPrimitiveSizeInBits() == 16;
16224 bool HasSystemScope) {
16231 if (HasSystemScope) {
16238 return RMW->
hasMetadata(
"amdgpu.no.fine.grained.memory");
16264 bool HasSystemScope =
16308 if (Ty->isFloatTy()) {
16313 if (Ty->isDoubleTy()) {
16334 if (Ty->isFloatTy() &&
16370 if (Ty->isFloatTy()) {
16451 if (HasSystemScope)
16491 if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
16493 : &AMDGPU::SReg_32RegClass;
16494 if (!
TRI->isSGPRClass(RC) && !isDivergent)
16495 return TRI->getEquivalentSGPRClass(RC);
16496 if (
TRI->isSGPRClass(RC) && isDivergent)
16497 return TRI->getEquivalentVGPRClass(RC);
16509 unsigned WaveSize) {
16514 if (!
IT ||
IT->getBitWidth() != WaveSize)
16519 if (!Visited.
insert(V).second)
16521 bool Result =
false;
16522 for (
const auto *U : V->users()) {
16524 if (V == U->getOperand(1)) {
16525 switch (Intrinsic->getIntrinsicID()) {
16529 case Intrinsic::amdgcn_if_break:
16530 case Intrinsic::amdgcn_if:
16531 case Intrinsic::amdgcn_else:
16536 if (V == U->getOperand(0)) {
16537 switch (Intrinsic->getIntrinsicID()) {
16541 case Intrinsic::amdgcn_end_cf:
16542 case Intrinsic::amdgcn_loop:
16548 Result =
hasCFUser(U, Visited, WaveSize);
16557 const Value *V)
const {
16559 if (CI->isInlineAsm()) {
16568 for (
auto &TC : TargetConstraints) {
16572 SIRI, TC.ConstraintCode, TC.ConstraintVT).second;
16585 for (;
I !=
E; ++
I) {
16609 return MRI.hasOneNonDBGUse(N0);
16616 if (
I.getMetadata(
"amdgpu.noclobber"))
16618 if (
I.getMetadata(
"amdgpu.last.use"))
16628 if (!Def->isMachineOpcode())
16638 if (
II.isCompare() &&
II.hasImplicitDefOfPhysReg(AMDGPU::SCC)) {
16639 PhysReg = AMDGPU::SCC;
16641 TRI->getMinimalPhysRegClass(PhysReg, Def->getSimpleValueType(ResNo));
16655 "this cannot be replaced with add");
16661 "target should have atomic fadd instructions");
16664 "generic atomicrmw expansion only supports FP32 operand in flat "
16711 bool ReturnValueIsUsed = !AI->
use_empty();
16730 std::prev(BB->
end())->eraseFromParent();
16731 Builder.SetInsertPoint(BB);
16732 CallInst *IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared, {},
16733 {
Addr},
nullptr,
"is.shared");
16734 Builder.CreateCondBr(IsShared, SharedBB, CheckPrivateBB);
16736 Builder.SetInsertPoint(SharedBB);
16737 Value *CastToLocal = Builder.CreateAddrSpaceCast(
16746 Builder.CreateBr(PhiBB);
16748 Builder.SetInsertPoint(CheckPrivateBB);
16749 CallInst *IsPrivate = Builder.CreateIntrinsic(
16750 Intrinsic::amdgcn_is_private, {}, {
Addr},
nullptr,
"is.private");
16751 Builder.CreateCondBr(IsPrivate, PrivateBB, GlobalBB);
16753 Builder.SetInsertPoint(PrivateBB);
16754 Value *CastToPrivate = Builder.CreateAddrSpaceCast(
16756 Value *LoadedPrivate = Builder.CreateAlignedLoad(ValTy, CastToPrivate,
16757 Alignment,
"loaded.private");
16761 Builder.CreateAlignedStore(NewVal, CastToPrivate, Alignment);
16762 Builder.CreateBr(PhiBB);
16764 Builder.SetInsertPoint(GlobalBB);
16765 Value *CastToGlobal = Builder.CreateAddrSpaceCast(
16767 Value *LoadedGlobal = AI;
16774 Builder.CreateBr(PhiBB);
16776 Builder.SetInsertPoint(PhiBB);
16778 if (ReturnValueIsUsed) {
16779 PHINode *Loaded = Builder.CreatePHI(ValTy, 3);
16787 Builder.CreateBr(ExitBB);
16802 LoadInst *LI = Builder.CreateAlignedLoad(
static bool isMul(MachineInstr *MI)
unsigned const MachineRegisterInfo * MRI
static unsigned getIntrinsicID(const SDNode *N)
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI, SmallVectorImpl< Register > &PackedAddrs, unsigned ArgOffset, const AMDGPU::ImageDimIntrinsicInfo *Intr, bool IsA16, bool IsG16)
Turn a set of s16 typed registers in AddrRegs into a dword sized vector with s16 typed elements.
static bool isKnownNonNull(Register Val, MachineRegisterInfo &MRI, const AMDGPUTargetMachine &TM, unsigned AddrSpace)
Return true if the value is a known valid address, such that a null check is not necessary.
Provides AMDGPU specific target descriptions.
The AMDGPU TargetMachine interface definition for hw codegen targets.
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ATTRIBUTE_UNUSED
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
iv Induction Variable Users
static const unsigned MaxDepth
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
Promote Memory to Register
static unsigned getAddressSpace(const Value *V, unsigned MaxLookup)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const SmallVectorImpl< MachineOperand > & Cond
const MachineOperand & RHS
static void r0(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
static void r3(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
static void r2(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
static void r1(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
#define FP_DENORM_FLUSH_NONE
#define FP_DENORM_FLUSH_IN_FLUSH_OUT
static void reservePrivateMemoryRegs(const TargetMachine &TM, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT, const SDLoc &DL, SelectionDAG &DAG, bool Unpacked)
static MachineBasicBlock * emitIndirectSrc(MachineInstr &MI, MachineBasicBlock &MBB, const GCNSubtarget &ST)
static bool denormalModeIsFlushAllF64F16(const MachineFunction &MF)
static bool isAtomicRMWLegalIntTy(Type *Ty)
static std::pair< unsigned, int > computeIndirectRegAndOffset(const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, unsigned VecReg, int Offset)
static bool denormalModeIsFlushAllF32(const MachineFunction &MF)
static bool addresses16Bits(int Mask)
static bool isClampZeroToOne(SDValue A, SDValue B)
static bool supportsMin3Max3(const GCNSubtarget &Subtarget, unsigned Opc, EVT VT)
static unsigned findFirstFreeSGPR(CCState &CCInfo)
static uint32_t getPermuteMask(SDValue V)
static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static int getAlignedAGPRClassID(unsigned UnalignedClassID)
static void processPSInputArgs(SmallVectorImpl< ISD::InputArg > &Splits, CallingConv::ID CallConv, ArrayRef< ISD::InputArg > Ins, BitVector &Skipped, FunctionType *FType, SIMachineFunctionInfo *Info)
static SDValue selectSOffset(SDValue SOffset, SelectionDAG &DAG, const GCNSubtarget *Subtarget)
static SDValue getLoadExtOrTrunc(SelectionDAG &DAG, ISD::LoadExtType ExtType, SDValue Op, const SDLoc &SL, EVT VT)
static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget, const AtomicRMWInst *RMW, bool HasSystemScope)
static void fixMasks(SmallVectorImpl< DotSrc > &Srcs, unsigned ChainLength)
static TargetLowering::AtomicExpansionKind atomicSupportedIfLegalIntType(const AtomicRMWInst *RMW)
static SDValue strictFPExtFromF16(SelectionDAG &DAG, SDValue Src)
Return the source of an fp_extend from f16 to f32, or a converted FP constant.
static bool isAtomicRMWLegalXChgTy(const AtomicRMWInst *RMW)
static bool bitOpWithConstantIsReducible(unsigned Opc, uint32_t Val)
static cl::opt< bool > DisableLoopAlignment("amdgpu-disable-loop-alignment", cl::desc("Do not align and prefetch loops"), cl::init(false))
static SDValue getDWordFromOffset(SelectionDAG &DAG, SDLoc SL, SDValue Src, unsigned DWordOffset)
static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI, unsigned InitResultReg, unsigned PhiReg, int Offset, bool UseGPRIdxMode, Register &SGPRIdxReg)
static bool isImmConstraint(StringRef Constraint)
static SDValue padEltsToUndef(SelectionDAG &DAG, const SDLoc &DL, EVT CastVT, SDValue Src, int ExtraElts)
static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool hasCFUser(const Value *V, SmallPtrSet< const Value *, 16 > &Visited, unsigned WaveSize)
static OptimizationRemark emitAtomicRMWLegalRemark(const AtomicRMWInst *RMW)
static unsigned SubIdx2Lane(unsigned Idx)
Helper function for adjustWritemask.
static bool addressMayBeAccessedAsPrivate(const MachineMemOperand *MMO, const SIMachineFunctionInfo &Info)
static MachineBasicBlock * lowerWaveReduce(MachineInstr &MI, MachineBasicBlock &BB, const GCNSubtarget &ST, unsigned Opc)
static bool elementPairIsContiguous(ArrayRef< int > Mask, int Elt)
static bool isV2BF16(Type *Ty)
static ArgDescriptor allocateSGPR32InputImpl(CCState &CCInfo, const TargetRegisterClass *RC, unsigned NumArgRegs)
static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue N0, SDValue N1, SDValue N2, bool Signed)
static SDValue resolveSources(SelectionDAG &DAG, SDLoc SL, SmallVectorImpl< DotSrc > &Srcs, bool IsSigned, bool IsAny)
static bool hasNon16BitAccesses(uint64_t PermMask, SDValue &Op, SDValue &OtherOp)
static void placeSources(ByteProvider< SDValue > &Src0, ByteProvider< SDValue > &Src1, SmallVectorImpl< DotSrc > &Src0s, SmallVectorImpl< DotSrc > &Src1s, int Step)
static EVT memVTFromLoadIntrReturn(const SITargetLowering &TLI, const DataLayout &DL, Type *Ty, unsigned MaxNumLanes)
static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(const SIInstrInfo *TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, const DebugLoc &DL, const MachineOperand &Idx, unsigned InitReg, unsigned ResultReg, unsigned PhiReg, unsigned InitSaveExecReg, int Offset, bool UseGPRIdxMode, Register &SGPRIdxReg)
static SDValue matchPERM(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isFrameIndexOp(SDValue Op)
static ConstantFPSDNode * getSplatConstantFP(SDValue Op)
static void allocateSGPR32Input(CCState &CCInfo, ArgDescriptor &Arg)
static bool isExtendedFrom16Bits(SDValue &Operand)
static std::optional< bool > checkDot4MulSignedness(const SDValue &N, ByteProvider< SDValue > &Src0, ByteProvider< SDValue > &Src1, const SDValue &S0Op, const SDValue &S1Op, const SelectionDAG &DAG)
static bool vectorEltWillFoldAway(SDValue Op)
static SDValue getSPDenormModeValue(uint32_t SPDenormMode, SelectionDAG &DAG, const SIMachineFunctionInfo *Info, const GCNSubtarget *ST)
static uint32_t getConstantPermuteMask(uint32_t C)
static MachineBasicBlock * emitIndirectDst(MachineInstr &MI, MachineBasicBlock &MBB, const GCNSubtarget &ST)
static void setM0ToIndexFromSGPR(const SIInstrInfo *TII, MachineRegisterInfo &MRI, MachineInstr &MI, int Offset)
static ArgDescriptor allocateVGPR32Input(CCState &CCInfo, unsigned Mask=~0u, ArgDescriptor Arg=ArgDescriptor())
static std::pair< MachineBasicBlock *, MachineBasicBlock * > splitBlockForLoop(MachineInstr &MI, MachineBasicBlock &MBB, bool InstInLoop)
static unsigned getBasePtrIndex(const MemSDNode *N)
MemSDNode::getBasePtr() does not work for intrinsics, which needs to offset by the chain and intrinsi...
static void knownBitsForWorkitemID(const GCNSubtarget &ST, GISelKnownBits &KB, KnownBits &Known, unsigned Dim)
static LLVM_ATTRIBUTE_UNUSED bool isCopyFromRegOfInlineAsm(const SDNode *N)
static void allocateFixedSGPRInputImpl(CCState &CCInfo, const TargetRegisterClass *RC, MCRegister Reg)
static SDValue constructRetValue(SelectionDAG &DAG, MachineSDNode *Result, ArrayRef< EVT > ResultTypes, bool IsTexFail, bool Unpacked, bool IsD16, int DMaskPop, int NumVDataDwords, bool IsAtomicPacked16Bit, const SDLoc &DL)
static std::optional< ByteProvider< SDValue > > handleMulOperand(const SDValue &MulOperand)
static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static Register getIndirectSGPRIdx(const SIInstrInfo *TII, MachineRegisterInfo &MRI, MachineInstr &MI, int Offset)
static SDValue emitNonHSAIntrinsicError(SelectionDAG &DAG, const SDLoc &DL, EVT VT)
static EVT memVTFromLoadIntrData(const SITargetLowering &TLI, const DataLayout &DL, Type *Ty, unsigned MaxNumLanes)
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc)
static unsigned getExtOpcodeForPromotedOp(SDValue Op)
static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL, uint64_t Val)
static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL, ArrayRef< SDValue > Elts)
static SDNode * findUser(SDValue Value, unsigned Opcode)
Helper function for LowerBRCOND.
static unsigned addPermMasks(unsigned First, unsigned Second)
static uint64_t clearUnusedBits(uint64_t Val, unsigned Size)
static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL, EVT VT, SDValue A, SDValue B, SDValue C, SDValue GlueChain, SDNodeFlags Flags)
static bool isV2F16OrV2BF16(Type *Ty)
static bool atomicIgnoresDenormalModeOrFPModeIsFTZ(const AtomicRMWInst *RMW)
static SDValue emitRemovedIntrinsicError(SelectionDAG &DAG, const SDLoc &DL, EVT VT)
static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL, EVT VT, SDValue A, SDValue B, SDValue GlueChain, SDNodeFlags Flags)
static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV, const SDLoc &DL, int64_t Offset, EVT PtrVT, unsigned GAFlags=SIInstrInfo::MO_NONE)
static cl::opt< bool > UseDivergentRegisterIndexing("amdgpu-use-divergent-register-indexing", cl::Hidden, cl::desc("Use indirect register addressing for divergent indexes"), cl::init(false))
static const std::optional< ByteProvider< SDValue > > calculateSrcByte(const SDValue Op, uint64_t DestByte, uint64_t SrcIndex=0, unsigned Depth=0)
static bool isV2F16(Type *Ty)
static void allocateSGPR64Input(CCState &CCInfo, ArgDescriptor &Arg)
SI DAG Lowering interface definition.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static constexpr int Concat[]
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo)
static bool isUniformMMO(const MachineMemOperand *MMO)
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
uint32_t getLDSSize() const
void setUsesDynamicLDS(bool DynLDS)
void setDynLDSAlign(const Function &F, const GlobalVariable &GV)
bool isEntryFunction() const
bool hasMadMacF32Insts() const
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
bool hasMadMixInsts() const
unsigned getWavefrontSizeLog2() const
bool has16BitInsts() const
bool isAmdHsaOrMesa(const Function &F) const
bool hasFastFMAF32() const
bool hasTrigReducedRange() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
bool hasVOP3PInsts() const
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG)
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const
Split a vector load into 2 loads of half the vector.
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
SDValue storeStackInputValue(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, SDValue ArgVal, int64_t Offset) const
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, uint32_t ValLo, uint32_t ValHi) const
Split the 64-bit value LHS into two 32-bit components, and perform the binary operation Opc to it wit...
SDValue lowerUnhandledCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals, StringRef Reason) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo &MFI, int ClobberedFI) const
static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src, SDNodeFlags Flags)
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue loadInputValue(SelectionDAG &DAG, const TargetRegisterClass *RC, EVT VT, const SDLoc &SL, const ArgDescriptor &Arg) const
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT)
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT, const SDLoc &SL, bool RawReg=false) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const
Split a vector store into 2 stores of half the vector.
std::pair< SDValue, SDValue > split64BitValue(SDValue Op, SelectionDAG &DAG) const
Return 64-bit value Op as two 32-bit integers.
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
static bool allUsesHaveSourceMods(const SDNode *N, unsigned CostThreshold=4)
bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const override
If SNaN is false,.
static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG)
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const
static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc)
bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const
Widen a suitably aligned v3 load.
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Class for arbitrary precision integers.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
unsigned countr_zero() const
Count the number of trailing zero bits.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
An instruction that atomically checks whether a specified value is in a memory location,...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
static unsigned getPointerOperandIndex()
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
Value * getPointerOperand()
void setOperation(BinOp Operation)
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This is an SDNode representing atomic operations.
bool isCompareAndSwap() const
Returns true if this SDNode represents cmpxchg atomic operation, false otherwise.
This class holds the attributes for a function, its return value, and its parameters.
MemoryEffects getMemoryEffects() const
Returns memory effects of the function.
bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
static ByteProvider getConstantZero()
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
std::optional< ISelOp > Src
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isAllocated(MCRegister Reg) const
isAllocated - Return true if the specified register (or an alias) is allocated.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
int64_t getLocMemOffset() const
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
bool isIntPredicate() const
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isInfinity() const
Return true if the value is an infinity.
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Diagnostic information for unsupported feature in backend.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
iterator_range< arg_iterator > args()
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
bool hasMemoryAtomicFaddF32DenormalSupport() const
bool hasD16Images() const
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
bool hasImageStoreD16Bug() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool hasAtomicFMinFMaxF64FlatInsts() const
bool hasDot7Insts() const
bool hasApertureRegs() const
bool hasFlatInstOffsets() const
bool hasAtomicFMinFMaxF32FlatInsts() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasBCNT(unsigned Size) const
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
bool hasMultiDwordFlatScratchAddressing() const
bool hasArchitectedSGPRs() const
bool hasDenormModeInst() const
bool hasPrivEnabledTrap2NopBug() const
bool hasUnalignedDSAccessEnabled() const
const SIInstrInfo * getInstrInfo() const override
bool hasDot1Insts() const
bool hasAtomicFaddRtnInsts() const
Align getStackAlignment() const
bool hasScalarSubwordLoads() const
bool enableFlatScratch() const
bool hasDwordx3LoadStores() const
bool hasFlatScrRegister() const
bool supportsGetDoorbellID() const
bool hasFlatAtomicFaddF32Inst() const
bool hasKernargPreload() const
const SIRegisterInfo * getRegisterInfo() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasLDSMisalignedBug() const
bool hasUserSGPRInit16Bug() const
TrapHandlerAbi getTrapHandlerAbi() const
const SIFrameLowering * getFrameLowering() const override
bool hasUnalignedScratchAccess() const
bool hasAtomicFMinFMaxF32GlobalInsts() const
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
bool hasGFX10_AEncoding() const
bool hasPackedFP32Ops() const
bool hasFullRate64Ops() const
bool isTrapHandlerEnabled() const
bool hasLDSFPAtomicAddF64() const
bool hasFlatGlobalInsts() const
bool getScalarizeGlobalBehavior() const
bool hasScalarSMulU64() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
bool hasShaderCyclesHiLoRegisters() const
bool hasNSAEncoding() const
bool hasSMemRealTime() const
bool usePRTStrictNull() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
bool hasAtomicFlatPkAdd16Insts() const
bool hasUnalignedBufferAccessEnabled() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasImageGather4D16Bug() const
bool supportsMinMaxDenormModes() const
bool hasAtomicFaddInsts() const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
bool hasAtomicBufferPkAddBF16Inst() const
bool hasAtomicFaddNoRtnInsts() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
bool hasScalarDwordx3Loads() const
bool hasLDSFPAtomicAddF32() const
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDot8Insts() const
bool hasDS96AndDS128() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasAtomicBufferGlobalPkAddF16Insts() const
bool hasScalarAddSub64() const
bool hasIEEEMinMax3() const
bool hasUnpackedD16VMem() const
bool hasAtomicGlobalPkAddBF16Inst() const
bool hasIEEEMinMax() const
bool hasFmaMixInsts() const
bool hasPackedTID() const
bool hasAddNoCarry() const
bool hasKernargSegmentPtr() const
bool hasDispatchID() const
bool hasPrivateSegmentBuffer() const
unsigned getNumFreeUserSGPRs()
bool hasImplicitBufferPtr() const
bool hasPrivateSegmentSize() const
bool hasDispatchPtr() const
bool hasFlatScratchInit() const
virtual void computeKnownBitsImpl(Register R, KnownBits &Known, const APInt &DemandedElts, unsigned Depth=0)
const MachineFunction & getMachineFunction() const
int64_t getOffset() const
unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
bool hasExternalLinkage() const
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
Type * getValueType() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
uint64_t getScalarSizeInBits() const
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Align getAlignment() const
Return alignment of the basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasCalls() const
Return true if the current function has any function calls.
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void setType(Register VReg, LLT Ty)
Set the low-level type of VReg to Ty.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
bool onlyWritesMemory() const
Whether this function only (at most) writes memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
A Module instance is used to store all the information related to an LLVM module.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool hasWorkGroupIDZ() const
SIModeRegisterDefaults getMode() const
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
unsigned getBytesInStackArgArea() const
const AMDGPUGWSResourcePseudoSourceValue * getGWSPSV(const AMDGPUTargetMachine &TM)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isVGPRClass(const TargetRegisterClass *RC)
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isTypeDesirableForOp(unsigned Op, EVT VT) const override
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override
Fold the instructions after selecting them.
SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const
bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, unsigned &PhysReg, int &Cost) const override
Allows the target to handle physreg-carried dependency in target-specific way.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool requiresUniformRegister(MachineFunction &MF, const Value *V) const override
Allows target to decide about the register class of the specific value that is live outside the defin...
bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override
Returns true if be combined with to form an ISD::FMAD.
AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void bundleInstWithWaitcnt(MachineInstr &MI) const
Insert MI into a BUNDLE with an S_WAITCNT 0 immediately following it.
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override
Return the type to use for a scalar shift opcode, given the shifted amount type.
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
MVT getPointerTy(const DataLayout &DL, unsigned AS) const override
Map address space 7 to MVT::v5i32 because that's its in-memory representation.
bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock * > &Exits) const override
Insert explicit copies in entry and exit blocks.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
bool allowsMisalignedMemoryAccessesImpl(unsigned Size, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const
const GCNSubtarget * getSubtarget() const
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
bool shouldEmitGOTReloc(const GlobalValue *GV) const
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth=5) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const
SDValue lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateLDSKernelId(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const
SDValue lowerDYNAMIC_STACKALLOCImpl(SDValue Op, SelectionDAG &DAG) const
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const override
void allocateHSAUserSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent) const override
Return the register class that should be used for the specified value type.
void AddMemOpInit(MachineInstr &MI) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
bool isLegalGlobalAddressingMode(const AddrMode &AM) const
void computeKnownBitsForFrameIndex(int FrameIdx, KnownBits &Known, const MachineFunction &MF) const override
Determine which of the bits of FrameIndex FIOp are known to be 0.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
bool getAsmOperandConstVal(SDValue Op, uint64_t &Val) const
bool isShuffleMaskLegal(ArrayRef< int >, EVT) const override
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
LLT getPreferredShiftAmountTy(LLT Ty) const override
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue lowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
MachineBasicBlock * emitGWSMemViolTestLoop(MachineInstr &MI, MachineBasicBlock *BB) const
bool checkAsmConstraintValA(SDValue Op, uint64_t Val, unsigned MaxSize=64) const
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool shouldEmitFixup(const GlobalValue *GV) const
MachineBasicBlock * splitKillBlock(MachineInstr &MI, MachineBasicBlock *BB) const
bool hasMemSDNodeUser(SDNode *N) const
bool isSDNodeSourceOfDivergence(const SDNode *N, FunctionLoweringInfo *FLI, UniformityInfo *UA) const override
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SmallVectorImpl< ISD::InputArg > &Ins, SelectionDAG &DAG) const
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const
bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace) const
SDValue LowerCallResult(SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals, bool isThisReturn, SDValue ThisVal) const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const override
If SNaN is false,.
AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation input to an Opcode operation is free (for instance,...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
Assign the register class depending on the number of bits set in the writemask.
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void allocateSpecialInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments at the end of allocated user arguments.
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
static bool isNonGlobalAddrSpace(unsigned AS)
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the 'Add TID' bit enabled The TID (Thread ID) is multiplied by the ...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool mayBeEmittedAsTailCall(const CallInst *) const override
Return true if the target may be able emit the call instruction as a tail call.
void passSpecialInputs(CallLoweringInfo &CLI, CCState &CCInfo, const SIMachineFunctionInfo &Info, SmallVectorImpl< std::pair< unsigned, SDValue > > &RegsToPass, SmallVectorImpl< SDValue > &MemOpChains, SDValue Chain) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool checkAsmConstraintVal(SDValue Op, StringRef Constraint, uint64_t Val) const
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, bool IsDivergentIdx, const GCNSubtarget *Subtarget)
Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be expanded into a set of cmp...
bool shouldUseLDSConstAddress(const GlobalValue *GV) const
bool supportSplitCSR(MachineFunction *MF) const override
Return true if the target supports that a subset of CSRs for the given machine function is handled ex...
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
LLT handling variant.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const
SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI)
bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
bool shouldEmitPCReloc(const GlobalValue *GV) const
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocatePreloadKernArgSGPRs(CCState &CCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ISD::InputArg > &Ins, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, MachineFunction &MF, unsigned IntrinsicID) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const
bool getAddrModeArguments(IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const override
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const override
Determine the known alignment for the pointer value R.
MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override
Similarly, the in-memory representation of a p7 is {p8, i32}, aka v8i32 when padding is added.
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
const Pass * getPass() const
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getBitcastedAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by first bitcasting (from potentia...
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
const TargetMachine & getTarget() const
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Information about stack frame layout on the target.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
@ ZeroOrOneBooleanContent
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
unsigned getID() const
Return the register class ID number.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
int getCopyCost() const
Return the cost of copying a value between two registers in this class.
iterator begin() const
begin/end - Return all of the registers in this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
OSType getOS() const
Get the parsed operating system type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isFunctionTy() const
True if this is an instance of FunctionType.
bool isIntegerTy() const
True if this is an instance of IntegerType.
const fltSemantics & getFltSemantics() const
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
void takeName(Value *V)
Transfer the name from V to this value.
Type * getElementType() const
constexpr bool isZero() const
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ BUFFER_STRIDED_POINTER
Address space for 192-bit fat buffer pointers with an additional index.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ STREAMOUT_REGISTER
Internal address spaces. Can be freely renumbered.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
@ BUFFER_ATOMIC_COND_SUB_U32
@ TBUFFER_LOAD_FORMAT_D16
@ TBUFFER_STORE_FORMAT_D16
@ BUFFER_STORE_FORMAT_D16
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isFlatGlobalAddrSpace(unsigned AS)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
const uint64_t FltRoundToHWConversionTable
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE bool isKernel(CallingConv::ID CC)
bool isGFX11(const MCSubtargetInfo &STI)
bool isCompute(CallingConv::ID cc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isChainCC(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool getMUBUFTfe(unsigned Opc)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isShader(CallingConv::ID cc)
bool isGFX10Plus(const MCSubtargetInfo &STI)
LLVM_READONLY int getVOPe64(uint16_t Opcode)
@ TowardZeroF32_TowardNegativeF64
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
uint32_t decodeFltRoundToHWConversionTable(uint32_t FltRounds)
Read the hardware rounding mode equivalent of a AMDGPUFltRounds value.
bool isExtendedGlobalAddrSpace(unsigned AS)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const RsrcIntrinsic * lookupRsrcIntrinsic(unsigned Intr)
bool isGraphics(CallingConv::ID cc)
const uint64_t FltRoundConversionTable
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ MaxID
The highest possible ID. Must be some 2^k - 1.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
@ BSWAP
Byte Swap and Counting operators.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ UNDEF
UNDEF - An undefined node.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
@ SMULO
Same for multiplication.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred)
getICmpCondCode - Return the ISD condition code corresponding to the given LLVM IR integer condition ...
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
testing::Matcher< const detail::ErrorHolder & > Failed()
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool isReleaseOrStronger(AtomicOrdering AO)
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
ArrayRef(const T &OneElt) -> ArrayRef< T >
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
ArgDescriptor WorkItemIDZ
ArgDescriptor WorkItemIDY
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
ArgDescriptor WorkItemIDX
static constexpr uint64_t encode(Fields... Values)
static std::tuple< typename Fields::ValueType... > decode(uint64_t Encoded)
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
static ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)
MCRegister getRegister() const
static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Helper struct shared between Function Specialization and SCCP Solver.
Represent subnormal handling kind for floating point instruction inputs and outputs.
@ Dynamic
Denormals have unknown treatment.
static constexpr DenormalMode getPreserveSign()
static constexpr DenormalMode getIEEE()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
bool isByteSized() const
Return true if the bit size is a multiple of 8.
EVT changeElementType(EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
uint64_t getScalarSizeInBits() const
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool isInteger() const
Return true if this is an integer or a vector integer type.
unsigned getPointerAddrSpace() const
unsigned getByValSize() const
bool isUnknown() const
Returns true if we don't know any bits.
void resetAll()
Resets the known state of all bits.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasAllowContract() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
DenormalMode FP64FP16Denormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
DenormalMode FP32Denormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SDValue ConvergenceControlToken
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
bool isBeforeLegalize() const